Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

......@@ -108,7 +108,7 @@ if __name__ == "__main__":
hashTermsOrig[key].append(line.capitalize())
print(' Terms read {} size: {}'.format(key, len(hashTerms[key])))
print(' Terms read {} size: {}'.format(key, len(hashTermsOrig[key])))
print("hashTermsOrig: {}".format(hashTermsOrig))
#print("hashTermsOrig: {}".format(hashTermsOrig))
#regularWords = words.words('en')
print()
......@@ -164,6 +164,9 @@ if __name__ == "__main__":
line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag'
# line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
#line = word + '\t' + termTag + '\t' + lemma + ' ' + termTag + ' TermTag'
if found:
oFile.write(line)
else:
oFile.write(line + '\n')
filesPreprocessed += 1
......