filter-v03.py 1.96 KB
# import fileinput
# import regex as re
# from regex import finditer
import sys
import json

if ( len( sys.argv ) != 3 ):
    # Original Daniel: sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <EFFs_dictionary> \n" )
    sys.stderr.write("E: usage: " + sys.argv[0] + " <input_file> <normalized_Effects> \n")
    sys.stderr.flush();

#    exit( 2 );

#LEER ARCHIVO INPUT
text_file = open( sys.argv[1], "r" )
dato = text_file.read()
text_file.close()

#LEE DICCIONARIO

# Loading normalized effects
# print('Loading normalized effects...')
with open(sys.argv[2]) as diccFile:
    hashNormalizedEffects = json.load(diccFile)
DICC = list(hashNormalizedEffects.keys())

# Original Daniel: text_file = open( sys.argv[2], "r" )
# Original Daniel: DICC = text_file.read().splitlines()
# Original Daniel: text_file.close()


#declara variables
is_dev = False
is_vrb = False


# DICC
# 2018-11-30 CMC: We separated noun and only past participle for deverbal processing
# and all verb forms as verbal
# VRB: VB 	verb, base form 	think
# VRB: VBZ 	verb, 3rd person singular present 	she thinks
# VRB: VBP 	verb, non-3rd person singular present 	I think
# VRB: VBD 	verb, past tense 	they thought
# DEV: VBN 	verb, past participle 	a sunken ship
# VRB: VBG 	verb, gerund or present participle 	thinking is fun
# extend/VBP
for i in range(len(DICC)):
    # print(DICC[i])
    for token in dato.split():
        word = token[:token.find("/")]
        tag = token[token.find("/")+1:]
        # print("word: {}".format(word))
        # print("tag: {}".format(tag))
        if (DICC[i] in word) and (("NN" in tag)
                                  or ("VBN" == tag)
                                  ):
            is_dev = True
            # print("deverbal: " + word)
        if (DICC[i] in word) and ("VB" in tag):
            is_vrb = True
            # print("verbal: " + word)

if is_dev and is_vrb:
    sys.exit(11)
elif is_dev:
    sys.exit(12)
elif is_vrb:
    sys.exit(13)
else:
    sys.exit(10)