evaluate-ris-gcs-standoff-v04.py 28.6 KB

Raw Blame History Permalink

# -*- coding: UTF-8 -*-
import operator
from optparse import OptionParser
import os
import sys
import json
import re

__author__ = 'CMendezC'


# Objective: evaluate predicted interactions in standoff format
# versus true interactions in tab format
# v04: add synonyms of TFs

# Parameters:
#   1) --truePath Path for true interactions
#   2) --trueFile File for true interactions
#   3) --predictedPath Path for predicted interactions
#   4) --outputPath Output path
#   5) --outputFile File for saving results
#   6) --evaluateGCs Evaluate with GCs
#   7) --diccPath Dictionary path
#   8) --diccSynon File with synonyms of TFs

# Ouput:
#   1) File with TP, FP, FN and scores Precision, Recall , F1

# Execution:
# python3.4 evaluate-ris-gcs-standoff.py
# --truePath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/dataSets/analysis-validation-data-sets
# --trueFile ris-analysis-reference.txt
# --predictedPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/predicted-ris-gcs
# --outputPath /home/cmendezc/bitbucket_repositories/automatic-extraction-ris-gcs/rie-gce-system/automatic-extraction-ris-gcs/evaluation-reports
# --outputFile evaluation-riegce-system-ris-analysis.txt
# --diccPath /home/cmendezc/terminologicalResources
# --diccSynon diccionario-STM-LT2-v7.0.SYNONYMS.json
# --evaluateGCs

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

def updateHashPredicted(pr, hashP, pm, sF, ef):
    if pr not in hashP:
        hashTemp = {"pmids": {pm: [sF]}, "orieff": ef}
        hashP[pr] = hashTemp
    else:
        hashTemp = hashP[pr]
        if pm in hashTemp["pmids"]:
            hashP[pr]["pmids"][pm].append(sF)
        else:
            hashP[pr]["pmids"][pm] = [sF]


def getSummary(r, hashTemp):
    pmids = 0
    sentences = 0
    orieff = ""
    if r in hashTemp:
        # print("r: {}".format(r))
        orieff = hashTemp[r]["orieff"]
        for pmid in hashTemp[r]["pmids"]:
            pmids += 1
            # print("PMID with sentences: {}".format(pmid))
            for sent in hashTemp[r]["pmids"][pmid]:
                sentences += 1
    else:
        return "WARNING: no data available!"
    return "Artículos: {}\tFrases: {}\tOriginal effect: {}".format(pmids, sentences, orieff)


def getDetail(r, hashTemp):
    return_text = ""
    sentences = 0
    aHash = {}
    if r in hashTemp:
        for pmid in hashTemp[r]["pmids"]:
            for sent in hashTemp[r]["pmids"][pmid]:
                sentences += 1
            if pmid not in aHash:
                aHash[pmid] = sentences
            else:
                return "WARNING: PMID duplicated!"
    else:
        return "WARNING: no data available!"
    for p, s in sorted(aHash.items(), key=operator.itemgetter(1), reverse=True):
        return_text += "\tPMID {}: {} frases\n".format(p, s)

    return return_text


def get_standard_name(regSynon):
    reg = ""
    if regSynon in hashSynon:
        reg = hashSynon[regSynon]
    else:
        for syn, std in hashSynon.items():
            if regSynon.startswith(syn):
                reg = regSynon.replace(syn, std, 1)
                break
    return reg


def isCorrect(ripr, listT, rtype):
    # The predicted regulator starts with entity
    # Effect and regulated coincide
    # Regulator coincides with activator or repressor
    # We return a flag to indicate type of matching: full
    list_ripr = ripr.split('\t')
    regulator = list_ripr[0]
    regulatorStdName = ""
    if use_synonyms:
        regulatorStdName = get_standard_name(regulator)
    for rit in listT:
        # print("RI TRUE: {}".format(rit))
        listRT = rit.split('\t')
        regulatorT = listRT[0]
        regexRegulatorStarts = re.compile(r'(' + regulatorT + r').+')
        if rtype == "ri":
            regulated = list_ripr[1]
            regulatedT = listRT[1]
            if (regulator == regulatorT or regulatorStdName == regulatorT) and regulated == regulatedT:
                return (rit, 'Full')
            # For cases where regulator is part of the word, such as ArgP-regulated
            result = regexRegulatorStarts.match(regulator)
            if result:
                # print("Regulator predicted {} starts with regulator true {}".format(regulator, result.group(1)))
                regulator = result.group(1)
                if regulator == regulatorT and regulated == regulatedT:
                    return (rit, 'Start')
            else:
                if use_synonyms:
                    result = regexRegulatorStarts.match(regulatorStdName)
                    if result:
                        # print("Regulator predicted {} starts with regulator true {}".format(regulator, result.group(1)))
                        regulator = result.group(1)
                        if regulator == regulatorT and regulated == regulatedT:
                            return (rit, 'Start')
        elif rtype == "rief":
            effect = list_ripr[2]
            regulated = list_ripr[1]
            effectT = listRT[2]
            regulatedT = listRT[1]
            # if ripr == "ArgP\ttargets\tregulator":
            # print("RI-PREDICT: ArgP\ttargets\tregulator")
            # print("  PREDICT: regulator {} effect {} regulated {}".format(regulator, effect, regulated))
            # print("  TRUE: regulator {} effect {} regulated {}".format(regulatorT, effectT, regulatedT))
            if (
                    regulator == regulatorT or regulatorStdName == regulatorT) and effect == effectT and regulated == regulatedT:
                return (rit, 'Full')
            elif (
                    regulator == regulatorT or regulatorStdName == regulatorT) and regulated == regulatedT and effect == "regulator" and (
                    effectT == "activator" or effectT == "repressor"):
                # if ripr == "ArgP\ttargets\tregulator":
                #    print("   Correct RI with regulator: {}".format(ripr))
                # return rit CMC 20181014: creo que deberia ser la predicha porque pierdo en la slitas de salida si fue correcta o no
                return (ripr, 'Regulator')
            else:
                # For cases where regulator is part of the word, such as ArgP-regulated
                result = regexRegulatorStarts.match(regulator)
                if result:
                    # print("Regulator predicted {} starts with regulator true {}".format(regulator, result.group(1)))
                    regulator = result.group(1)
                    if regulator == regulatorT and effect == effectT and regulated == regulatedT:
                        return (rit, 'Start')
                    elif regulator == regulatorT and regulated == regulatedT and effect == "regulator" and (
                            effectT == "activator" or effectT == "repressor"):
                        # if ripr == "ArgP\ttargets\tregulator":
                        #    print("   Correct RI with regulator: {}".format(ripr))
                        # return rit CMC 20181014: creo que deberia ser la predicha porque pierdo en la slitas de salida si fue correcta o no
                        # solo que en este caso uso solo el regulador
                        # return rit
                        return (regulator + '\t' + regulated + '\t' + effect, 'Regulator')
                else:
                    if use_synonyms:
                        result = regexRegulatorStarts.match(regulatorStdName)
                        if result:
                            if regulator == regulatorT and effect == effectT and regulated == regulatedT:
                                return (rit, 'Start')
                            elif regulator == regulatorT and regulated == regulatedT and effect == "regulator" and (
                                            effectT == "activator" or effectT == "repressor"):
                                # if ripr == "ArgP\ttargets\tregulator":
                                #    print("   Correct RI with regulator: {}".format(ripr))
                                # return rit CMC 20181014: creo que deberia ser la predicha porque pierdo en la slitas de salida si fue correcta o no
                                # solo que en este caso uso solo el regulador
                                # return rit
                                return (regulator + '\t' + regulated + '\t' + effect, 'Regulator')

                                # CMC 2018-10-14: Revisar riefgc porque no se ha actualizado
                                # elif rtype == "riefgc":
                                #     effect = list_ripr[2]
                                #     regulated = list_ripr[1]
                                #     gc = list_ripr[3]
                                #     effectT = listRT[2]
                                #     regulatedT = listRT[1]
                                #     gcT = listRT[3]
                                #     if regulatorT == regulator and effect == effectT and regulated == regulatedT and gc == gcT:
                                #         return rit
                                #     elif regulatorT == regulator and effect == "regulator" and (effectT == "activator" or effectT == "repressor") and gc == gcT:
                                #         return rit
                                #     else:
                                #         # For cases where regulator is part of the word, such as ArgP-regulated
                                #         result = regexRegulatorStarts.match(regulator)
                                #         if result:
                                #             #print("Regulator predicted {} starts with regulator true {}".format(regulator, result.group(1)))
                                #             regulator = result.group(1)
                                #         if regulatorT == regulator and effect == effectT and regulated == regulatedT and gc == gcT:
                                #             return rit
                                #         elif regulatorT == regulator and effect == "regulator" and (effectT == "activator" or effectT == "repressor") and gc == gcT:
                                #             return rit
    return ('', '')


def get_scores_rules(listTrue, listPredicted, hashTemp, title, ri_type):
    print("Evaluation")
    # print(listPredicted)
    # Precision = Extraídos correctos / Predichos
    # Recall = Extraídos correctos / Referencia
    # F - 1 = 2 * ((Precision * Recall) / (Precision + Recall))
    correct = 0
    incorrect = 0
    # For registering correct and incorrect RIs
    hashPredicted = {}
    # To print output RIs
    hashOutputRIs = {}
    # For registering unrecovered RIs
    hashUnrecovered = {}

    predicted = len(listPredicted)
    print("len(listPredicted): {}".format(predicted))
    reference = len(listTrue)
    # print("Reference: {}".format(reference))

    listRecovered = []
    for ri_pred in listPredicted:
        print("ri_pred: {}".format(ri_pred))
        # if ri_pred in hashPredicted:
        #    print("WARNING: RI predicted {} duplicated {}".format(ri_pred, hashPredicted[ri_pred]))
        # else:
        # First all predicted RIs are incorrect
        #    hashPredicted[ri_pred] = "incorrect"
        # if ri_pred in listTrue:
        #    hashPredicted[ri_pred] = "correct"
        #    listRecovered.append(ri_pred)
        #    correct += 1
        #    continue
        riTrue = ''
        result = isCorrect(ri_pred, listTrue, ri_type)
        riResult = result[0]
        matchType = result[1]
        if riResult != '':
            if riResult not in hashOutputRIs:
                hashOutputRIs[riResult] = "Correct"
            if ri_pred not in hashPredicted:
                hashPredicted[ri_pred] = "correct"
            print("ri_pred {} correct".format(ri_pred))
            correct += 1
            # Complete matching or the predicted regulator starts with entity
            if matchType == 'Full' or matchType == 'Start':
                # ri_pred matches with ri_true
                if riResult in listRecovered:
                    print("WARNING: riResult {} already in listRecovered".format(riResult))
                else:
                    listRecovered.append(riResult)
        else:
            incorrect += 1
            if riResult not in hashOutputRIs:
                hashOutputRIs[riResult] = "Incorrect"
            if ri_pred not in hashPredicted:
                hashPredicted[ri_pred] = "incorrect"
            print("ri_pred {} incorrect".format(ri_pred))

    if len(hashPredicted) != predicted:
        print("ERROR: number of predicted RIs mismatch")
        # return
    print("Predicted: {}".format(predicted))
    print("len(hashPredicted): {}".format(len(hashPredicted)))

    cor = 0
    inc = 0
    for r, v in hashPredicted.items():
        if v == "correct":
            cor += 1
        elif v == "incorrect":
            inc += 1
    if cor != correct:
        print("ERROR: number of correct RIs mismatch")
        # return
    if inc != incorrect:
        print("ERROR: number of incorrect RIs mismatch")
        # return
    print("Correct: {}".format(correct))
    print("Incorrect: {}".format(incorrect))

    unrecovered = 0
    recovered = 0  # Only when coincide with reference
    # without considering Regulator correct when Activator or Repressor appears in reference
    listRecovered2 = []
    listUnrecovered = []
    for ri in listTrue:
        if ri not in listRecovered:
            if ri in listUnrecovered:
                print("WARNING: ri {} already in listUnrecovered".format(ri))
            else:
                listUnrecovered.append(ri)
                unrecovered += 1
        else:
            if ri in listRecovered2:
                print("WARNING: ri {} already in listRecovered2".format(ri))
            else:
                listRecovered2.append(ri)
                recovered += 1

    print("Len listRecovered: {}".format(len(listRecovered)))
    print("Len listRecovered2: {}".format(len(listRecovered2)))
    print("Len listUnrecovered: {}".format(len(listUnrecovered)))
    # if (unrecovered + correct) != reference:
    #    print("ERROR: number of unrecovered {} + correct {} and reference {} RIs mismatch".format(unrecovered, correct, reference))
    #    return

    print("{}".format(title))
    print("Predicted: {}".format(predicted))
    print("Reference: {}".format(reference))
    print("Unrecovered: {}".format(unrecovered))
    print("Recovered: {}".format(recovered))

    precision = correct / predicted
    print("Precision = correct / predicted: {}".format(precision))
    # recall = correct / reference
    # We calculate recall as recovery rate, because correct instances are calculates
    # considering Regulator correct when Activator and Repressor appears in reference
    recall = recovered / reference
    print("Recall = recovered / reference: {}".format(recall))
    f1 = 2 * ((precision * recall) / (precision + recall))
    print("F1: {}".format(f1))

    with open(os.path.join(options.outputPath, options.outputFile), mode="a", errors="replace") as oFile:
        oFile.write("{}\n".format(title))
        oFile.write("Predicted: {}\n".format(predicted))
        oFile.write("Reference: {}\n".format(reference))
        oFile.write("Correct: {}\n".format(correct))
        oFile.write("Incorrect: {}\n".format(incorrect))
        oFile.write("Unrecovered: {}\n".format(unrecovered))
        oFile.write("Recovered: {}\n".format(recovered))
        oFile.write("Precision = correct / predicted: {}\n".format(precision))
        oFile.write("Recall = recovered / reference: {}\n".format(recall))
        oFile.write("F1: {}\n".format(f1))
        oFile.write("Unrecovered instances:\n")
        for r in sorted(listUnrecovered):
            oFile.write("\tUnrecovered: {}\n".format(r))
        oFile.write("Recovered instances:\n")
        for r in sorted(listRecovered):
            oFile.write("\tRecovered: {}\n".format(r))
        oFile.write("Incorrect instances:\n")
        for r, v in sorted(hashPredicted.items()):
            if v == "incorrect":
                oFile.write("\tIncorrect: {}\n".format(r))
        oFile.write("Correct instances:\n")
        for r, v in sorted(hashPredicted.items()):
            if v == "correct":
                oFile.write("\tCorrect: {}\n".format(r))
                # oFile.write("\t{}\t{}\n".format(r, getSummary(r, hashTemp)))
                # oFile.write("\t{}\n".format(getDetail(r, hashTemp)))


def get_scores(listTrue, listPredicted, hashTemp, title):
    # Precision = Extraídos correctos / Extraídos
    # Recall = Extraídos correctos / Referencia
    # F - 1 = 2 * ((Precision * Recall) / (Precision + Recall))
    print("{}".format(title))
    # print("listTrue: {}".format(listTrue))
    # print("listPredicted: {}".format(listPredicted))
    print("Predicted: {}".format(len(listPredicted)))
    print("Reference: {}".format(len(listTrue)))
    correct = set(listTrue) & set(listPredicted)
    print("Correct: {} ({})".format(len(correct), len(correct) / len(listPredicted)))
    incorrect = set(listPredicted) - set(listTrue)
    print("Incorrect: {} ({})".format(len(incorrect), len(incorrect) / len(listPredicted)))
    unrecovered = set(listTrue) - set(listPredicted)
    print("Unrecovered: {} ()".format(len(unrecovered), len(unrecovered) / len(listTrue)))
    precision = len(correct) / len(listPredicted)
    print("Precision: {}".format(precision))
    recall = len(correct) / len(listTrue)
    print("Recall: {}".format(recall))
    f1 = 2 * ((precision * recall) / (precision + recall))
    print("F1: {}".format(f1))

    with open(os.path.join(options.outputPath, options.outputFile), mode="a") as oFile:
        oFile.write("{}\n".format(title))
        oFile.write("Predicted: {}\n".format(len(listPredicted)))
        oFile.write("Reference: {}\n".format(len(listTrue)))
        oFile.write("Correct: {}\n".format(len(correct)))
        oFile.write("Incorrect: {}\n".format(len(incorrect)))
        oFile.write("Unrecovered: {}\n".format(len(unrecovered)))
        oFile.write("Precision: {}\n".format(precision))
        oFile.write("Recall: {}\n".format(recall))
        oFile.write("F1: {}\n".format(f1))
        oFile.write("Correct instances:\n")
        for r in sorted(correct):
            oFile.write("\t{}\t{}\n".format(r, getSummary(r, hashTemp)))
            oFile.write("\t{}\n".format(getDetail(r, hashTemp)))
        oFile.write("Incorrect instances:\n")
        for r in sorted(incorrect):
            oFile.write("\t{}\n".format(r))
        oFile.write("Unrecovered instances:\n")
        for r in sorted(unrecovered):
            oFile.write("\t{}\n".format(r))


if __name__ == "__main__":
    # Parameter definition
    parser = OptionParser()
    parser.add_option("--truePath", dest="truePath",
                      help="Path true ris gcs", metavar="PATH")
    parser.add_option("--trueFile", dest="trueFile",
                      help="File true ris gcs", metavar="FILE")
    parser.add_option("--predictedPath", dest="predictedPath",
                      help="Path predicted ris gcs", metavar="PATH")
    parser.add_option("--outputPath", dest="outputPath",
                      help="Output path", metavar="PATH")
    parser.add_option("--outputFile", dest="outputFile",
                      help="File for saving results", metavar="FILE")
    parser.add_option("--evaluateGCs", default=False,
                      action="store_true", dest="evaluateGCs",
                      help="Evaluate GCs?")
    parser.add_option("--diccPath", dest="diccPath",
                      help="Path to dictionary", metavar="PATH")
    parser.add_option("--diccSynon", dest="diccSynon",
                      help="File with synonyms", metavar="FILE")

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error("None parameter entered.")
        sys.exit(1)

    # Printing parameter values
    print('-------------------------------- PARAMETERS --------------------------------')
    print("Path true ris gcs: " + str(options.truePath))
    print("File true ris gcs: " + str(options.trueFile))
    print("Path predicted ris gcs: " + str(options.predictedPath))
    print("Output path: " + str(options.outputPath))
    print("File for saving results: " + str(options.outputFile))
    print("Evaluate GCs: " + str(options.evaluateGCs))
    print("Path to dictionary: " + str(options.diccPath))
    print("File with synonyms: " + str(options.diccSynon))

    use_synonyms = False
    hashSynon = {}
    if options.diccPath != None and options.diccSynon != "no-synonyms":
        print("***** Using synonyms *****")
        use_synonyms = True
        print('Loading dictionary of synonyms...')
        with open(os.path.join(options.diccPath, options.diccSynon)) as diccSynon:
            hashSynon = json.load(diccSynon)
        print('Loading dictionary of synonyms {}... done!'.format(len(hashSynon)))

    listTrueRI = []  # Without effect nor gc
    listTrueRIEF = []  # With effect nor gc
    if options.evaluateGCs:
        listTrueRIEFGC = []  # With effect and gc
    # Read and process Reference
    with open(os.path.join(options.truePath, options.trueFile), mode="r", encoding="utf-8") as iFile:
        for line in iFile:
            line = line.strip('\n')
            if line.startswith("#"):
                continue
            listElem = line.split('\t')
            if len(listElem) > 4:
                regulator = listElem[2]
                regulated = listElem[3]
                effect = listElem[4]
                if options.evaluateGCs:
                    gc = listElem[5]
            else:
                regulator = listElem[0]
                regulated = listElem[1]
                effect = listElem[2]
                if options.evaluateGCs:
                    gc = listElem[3]
            if effect == "binding":
                effect = "regulator"
            ri = "{}\t{}".format(regulator, regulated)
            if ri not in listTrueRI:
                listTrueRI.append(ri)
            rief = "{}\t{}\t{}".format(regulator, regulated, effect)
            if rief not in listTrueRIEF:
                listTrueRIEF.append(rief)
            if options.evaluateGCs:
                riefgc = "{}\t{}\t{}\t{}".format(regulator, regulated, effect, gc)
                if riefgc not in listTrueRIEFGC:
                    listTrueRIEFGC.append(riefgc)
    print("   RIs en referencia antes regulators: {}".format(len(listTrueRI)))
    print("   RIEFs en referencia antes regulators: {}".format(len(listTrueRIEF)))
    if options.evaluateGCs:
        print("   RIEFGCs en referencia antes regulators: {}".format(len(listTrueRIEFGC)))

    # Eliminate those RIs with regulator which also have RIs with activator or repressor
    listRITemp = []
    for ri in listTrueRIEF:
        listRI = ri.split('\t')
        regulator = listRI[0]
        regulated = listRI[1]
        effect = listRI[2]
        if effect == "regulator":
            tempRIA = "{}\t{}\t{}".format(regulator, regulated, "activator")
            tempRIR = "{}\t{}\t{}".format(regulator, regulated, "repressor")
            if tempRIA in listTrueRIEF or tempRIR in listTrueRIEF:
                pass
                # print("RI regulator matchs RI activator/repressor: {}".format(ri))
                # listTrueRIEF.remove(ri)
            else:
                # print("Len before: {}".format(len(listRITemp)))
                listRITemp.append(ri)
                # print("Len after: {}".format(len(listRITemp)))
        else:
            listRITemp.append(ri)
    listTrueRIEF = listRITemp

    print("   RIEFs en referencia después regulators: {}".format(len(listTrueRIEF)))
    if options.evaluateGCs:
        for ri in listTrueRIEFGC:
            listRI = ri.split('\t')
            regulator = listRI[0]
            regulated = listRI[1]
            effect = listRI[2]
            gc = listRI[3]
            if effect == "regulator":
                tempRIGCA = "{}\t{}\t{}\t{}".format(regulator, regulated, "activator", gc)
                tempRIGCR = "{}\t{}\t{}\t{}".format(regulator, regulated, "repressor", gc)
                if tempRIGCA in listTrueRIEFGC or tempRIGCR in listTrueRIEFGC:
                    listTrueRIEFGC.remove(ri)
        print("   RIEFGCs en referencia después regulators: {}".format(len(listTrueRIEFGC)))

    listPredictedRI = []
    hashPredictedRI = {}
    listPredictedRIEF = []
    hashPredictedRIEF = {}
    if options.evaluateGCs:
        listPredictedRIEFGC = []
        hashPredictedRIEFGC = {}
    hashFiles = {}
    for path, dirs, files in os.walk(options.predictedPath):
        for file in files:
            if file.endswith(".a1"):
                filename = file[:-3]
                if filename not in hashFiles:
                    hashFiles[filename] = 1
                else:
                    hashFiles[filename] += 1
    print("Files: {}".format(len(hashFiles)))

    hashEntities = {}
    processedFiles = 0
    for file in sorted(hashFiles.keys()):
        print("File: {}".format(file))
        pmid = file[:file.find("_")]
        # print("pmid {}".format(pmid))
        sentenceFile = file[:file.find("-", file.find("_"))] + ".txt"
        hashEntities = {}
        hashOriginalEffect = {}
        with open(os.path.join(options.predictedPath, file + ".a1"), mode="r") as a1File:
            for line in a1File:
                line = line.strip('\n')
                listLine1 = line.split('\t')
                listLine2 = listLine1[1].split(' ')
                entity = listLine2[0]
                idEntity = listLine1[0]
                originalEffect = listLine1[2]
                if entity.startswith("EFFECT"):
                    entity = entity[entity.find(".") + 1:]
                    print("Entity: {}".format(entity))
                    entity = entity.replace("_dev", "")
                    print("Entity without _dev: {}".format(entity))
                    if idEntity not in hashOriginalEffect:
                        hashOriginalEffect[idEntity] = originalEffect
                else:
                    entity = listLine1[2]
                if idEntity not in hashEntities:
                    hashEntities[idEntity] = entity
        print("hashEntities: {}".format(hashEntities))

        with open(os.path.join(options.predictedPath, file + ".a2"), mode="r") as a2File:
            for line in a2File:
                # print("Line a2: {}".format(line))
                # R1	Interaction.T3 Target:T2 Agent:T1 Condition: T4
                line = line.strip('\n')
                listLine1 = line.split('\t')
                listLine2 = listLine1[1].split(' ')
                regulator = listLine2[2]
                regulator = regulator[regulator.find(":") + 1:]
                regulated = listLine2[1]
                regulated = regulated[regulated.find(":") + 1:]
                effect = listLine2[0]
                effect = effect[effect.find(".") + 1:]
                # print("effect: {}".format(hashEntities[effect]))
                # if hashEntities[effect] == "binding":
                #    continue
                if options.evaluateGCs:
                    gc = listLine2[3]
                    gc = gc[gc.find(":") + 1:]

                pri = "{}\t{}".format(hashEntities[regulator], hashEntities[regulated])
                if pri not in listPredictedRI:
                    listPredictedRI.append(pri)
                updateHashPredicted(pri, hashPredictedRI, pmid, sentenceFile, None)

                prief = "{}\t{}\t{}".format(hashEntities[regulator], hashEntities[regulated], hashEntities[effect])
                print("prief: {}".format(prief))
                if prief not in listPredictedRIEF:
                    listPredictedRIEF.append(prief)
                updateHashPredicted(prief, hashPredictedRIEF, pmid, sentenceFile, hashOriginalEffect[effect])

                if options.evaluateGCs:
                    priefgc = "{}\t{}\t{}\t{}".format(hashEntities[regulator], hashEntities[regulated],
                                                      hashEntities[effect], hashEntities[gc])
                    if priefgc not in listPredictedRIEFGC:
                        listPredictedRIEFGC.append(priefgc)
                    updateHashPredicted(priefgc, hashPredictedRIEFGC, pmid, sentenceFile, hashOriginalEffect[effect])
        processedFiles += 1

    print("Processed files: {}".format(processedFiles))
    with open(os.path.join(options.outputPath, options.outputFile), mode="w") as oFile:
        pass
    get_scores_rules(listTrueRIEF, listPredictedRIEF, hashPredictedRIEF,
                     "Scores regulator-regulated-effect (without gc)", "rief")
    get_scores_rules(listTrueRI, listPredictedRI, hashPredictedRI, "Scores regulator-regulated (without effect nor gc)",
                     "ri")
    if options.evaluateGCs:
        get_scores_rules(listTrueRIEFGC, listPredictedRIEFGC, hashPredictedRIEFGC,
                         "Scores regulator-regulated-effect-gc", "riefgc")