Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

...@@ -155,8 +155,35 @@ if __name__ == "__main__": ...@@ -155,8 +155,35 @@ if __name__ == "__main__":
155 for w, l in zip(word.split('-'), lemma.split('-')): 155 for w, l in zip(word.split('-'), lemma.split('-')):
156 line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n' 156 line += w + '\t' + listLine1[1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
157 line = line.rstrip('\n') 157 line = line.rstrip('\n')
158 + break
158 #print("Line: {}".format(line)) 159 #print("Line: {}".format(line))
159 if not found: 160 if not found:
161 + for i in range(repetitions, 0, -1):
162 + wordOrig = wordOrig.replace('-', ' ', 1)
163 + print("Word: {}".format(wordOrig))
164 + if wordOrig in hashTermsOrig[termTag]:
165 + print("WordOrig: {}".format(wordOrig))
166 + found = True
167 + line = ''
168 + for w, l in zip(word.split('-'), lemma.split('-')):
169 + line += w + '\t' + listLine1[
170 + 1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
171 + line = line.rstrip('\n')
172 + break
173 + if not found:
174 + for i in range(0, repetitions):
175 + wordOrig = word.replace('-', ' ', 1)
176 + print("Word: {}".format(wordOrig))
177 + if wordOrig in hashTermsOrig[termTag]:
178 + print("WordOrig: {}".format(wordOrig))
179 + found = True
180 + line = ''
181 + for w, l in zip(word.split('-'), lemma.split('-')):
182 + line += w + '\t' + listLine1[
183 + 1] + '\t' + l + ' ' + termTag + ' TermTag' + '\n'
184 + line = line.rstrip('\n')
185 + break
186 + if not found:
160 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 187 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
161 else: 188 else:
162 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 189 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
......