Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

...@@ -136,6 +136,7 @@ if __name__ == "__main__": ...@@ -136,6 +136,7 @@ if __name__ == "__main__":
136 if len(word) > 1: 136 if len(word) > 1:
137 for termTag in hashTerms: 137 for termTag in hashTerms:
138 if word in hashTerms[termTag]: 138 if word in hashTerms[termTag]:
139 + if word.find('-') > -1:
139 wordOrig = word.replace('-', ' ') 140 wordOrig = word.replace('-', ' ')
140 #print("Word: {}".format(word)) 141 #print("Word: {}".format(word))
141 if wordOrig in hashTermsOrig[termTag]: 142 if wordOrig in hashTermsOrig[termTag]:
...@@ -146,7 +147,8 @@ if __name__ == "__main__": ...@@ -146,7 +147,8 @@ if __name__ == "__main__":
146 line.rstrip('\n') 147 line.rstrip('\n')
147 else: 148 else:
148 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 149 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
149 - #line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 150 + else:
151 + line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
150 else: 152 else:
151 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag' 153 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag'
152 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 154 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
......