Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

......@@ -154,7 +154,7 @@ if __name__ == "__main__":
line = ''
for w, l in zip(word.split('-'), lemma.split('-')):
line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
line.rstrip('\r\n')
line = line.rstrip('\n')
print("Line: {}".format(line))
if not found:
line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
......@@ -164,10 +164,7 @@ if __name__ == "__main__":
line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag'
# line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
#line = word + '\t' + termTag + '\t' + lemma + ' ' + termTag + ' TermTag'
if found:
oFile.write(line)
else:
oFile.write(line + '\n')
oFile.write(line + '\n')
filesPreprocessed += 1
# Imprime archivos procesados
......