Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

...@@ -143,19 +143,19 @@ if __name__ == "__main__": ...@@ -143,19 +143,19 @@ if __name__ == "__main__":
143 if word.find('-') > -1: 143 if word.find('-') > -1:
144 found = False 144 found = False
145 repetitions = word.count('-') 145 repetitions = word.count('-')
146 - #print("repetitions: {}".format(repetitions)) 146 + print("repetitions: {}".format(repetitions))
147 wordOrig = word 147 wordOrig = word
148 for i in range(0, repetitions): 148 for i in range(0, repetitions):
149 wordOrig = wordOrig.replace('-', ' ', 1) 149 wordOrig = wordOrig.replace('-', ' ', 1)
150 - #print("Word: {}".format(wordOrig)) 150 + print("Word: {}".format(wordOrig))
151 if wordOrig in hashTermsOrig[termTag]: 151 if wordOrig in hashTermsOrig[termTag]:
152 - #print("WordOrig: {}".format(wordOrig)) 152 + print("WordOrig: {}".format(wordOrig))
153 found = True 153 found = True
154 line = '' 154 line = ''
155 for w, l in zip(word.split('-'), lemma.split('-')): 155 for w, l in zip(word.split('-'), lemma.split('-')):
156 line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n' 156 line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
157 line = line.rstrip('\n') 157 line = line.rstrip('\n')
158 - print("Line: {}".format(line)) 158 + #print("Line: {}".format(line))
159 if not found: 159 if not found:
160 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 160 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
161 else: 161 else:
......