Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

...@@ -136,17 +136,19 @@ if __name__ == "__main__": ...@@ -136,17 +136,19 @@ if __name__ == "__main__":
136 if len(word) > 1: 136 if len(word) > 1:
137 for termTag in hashTerms: 137 for termTag in hashTerms:
138 if word in hashTerms[termTag]: 138 if word in hashTerms[termTag]:
139 - wordOrig = word.replace('-', ' ') 139 + if word.find('-') > -1:
140 - #print("Word: {}".format(word)) 140 + wordOrig = word.replace('-', ' ')
141 - if wordOrig in hashTermsOrig[termTag]: 141 + #print("Word: {}".format(word))
142 - print("WordOrig: {}".format(wordOrig)) 142 + if wordOrig in hashTermsOrig[termTag]:
143 - line = '' 143 + print("WordOrig: {}".format(wordOrig))
144 - for w, l in zip(word.split('-'), lemma.split('-')): 144 + line = ''
145 - line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n' 145 + for w, l in zip(word.split('-'), lemma.split('-')):
146 - line.rstrip('\n') 146 + line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
147 + line.rstrip('\n')
148 + else:
149 + line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
147 else: 150 else:
148 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 151 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
149 - #line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
150 else: 152 else:
151 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag' 153 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag'
152 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 154 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
......