Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

......@@ -158,33 +158,6 @@ if __name__ == "__main__":
break
#print("Line: {}".format(line))
if not found:
wordOrig = word
for i in range(repetitions, 0, -1):
wordOrig = wordOrig.replace('-', ' ', i)
print("Word: {}".format(wordOrig))
if wordOrig in hashTermsOrig[termTag]:
print("WordOrig: {}".format(wordOrig))
found = True
line = ''
for w, l in zip(word.split('-'), lemma.split('-')):
line += w + '\t' + listLine1[
1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
line = line.rstrip('\n')
break
if not found:
for i in range(1, repetitions + 1):
wordOrig = word.replace('-', ' ', i)
print("Word: {}".format(wordOrig))
if wordOrig in hashTermsOrig[termTag]:
print("WordOrig: {}".format(wordOrig))
found = True
line = ''
for w, l in zip(word.split('-'), lemma.split('-')):
line += w + '\t' + listLine1[
1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
line = line.rstrip('\n')
break
if not found:
line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
else:
line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
......