filter-v03.py
1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# import fileinput
# import regex as re
# from regex import finditer
import sys
import json
if ( len( sys.argv ) != 3 ):
# Original Daniel: sys.stderr.write( "E: usage: " +sys.argv[0] + " <input_file> <EFFs_dictionary> \n" )
sys.stderr.write("E: usage: " + sys.argv[0] + " <input_file> <normalized_Effects> \n")
sys.stderr.flush();
# exit( 2 );
#LEER ARCHIVO INPUT
text_file = open( sys.argv[1], "r" )
dato = text_file.read()
text_file.close()
#LEE DICCIONARIO
# Loading normalized effects
# print('Loading normalized effects...')
with open(sys.argv[2]) as diccFile:
hashNormalizedEffects = json.load(diccFile)
DICC = list(hashNormalizedEffects.keys())
# Original Daniel: text_file = open( sys.argv[2], "r" )
# Original Daniel: DICC = text_file.read().splitlines()
# Original Daniel: text_file.close()
#declara variables
is_dev = False
is_vrb = False
# DICC
# 2018-11-30 CMC: We separated noun and only past participle for deverbal processing
# and all verb forms as verbal
# VRB: VB verb, base form think
# VRB: VBZ verb, 3rd person singular present she thinks
# VRB: VBP verb, non-3rd person singular present I think
# VRB: VBD verb, past tense they thought
# DEV: VBN verb, past participle a sunken ship
# VRB: VBG verb, gerund or present participle thinking is fun
# extend/VBP
for i in range(len(DICC)):
# print(DICC[i])
for token in dato.split():
word = token[:token.find("/")]
tag = token[token.find("/")+1:]
# print("word: {}".format(word))
# print("tag: {}".format(tag))
if (DICC[i] in word) and (("NN" in tag)
or ("VBN" == tag)
):
is_dev = True
# print("deverbal: " + word)
if (DICC[i] in word) and ("VB" in tag):
is_vrb = True
# print("verbal: " + word)
if is_dev and is_vrb:
sys.exit(11)
elif is_dev:
sys.exit(12)
elif is_vrb:
sys.exit(13)
else:
sys.exit(10)