Carlos-Francisco Méndez-Cruz

New terminological tagging for CRFs

...@@ -108,7 +108,7 @@ if __name__ == "__main__": ...@@ -108,7 +108,7 @@ if __name__ == "__main__":
108 hashTermsOrig[key].append(line.capitalize()) 108 hashTermsOrig[key].append(line.capitalize())
109 print(' Terms read {} size: {}'.format(key, len(hashTerms[key]))) 109 print(' Terms read {} size: {}'.format(key, len(hashTerms[key])))
110 print(' Terms read {} size: {}'.format(key, len(hashTermsOrig[key]))) 110 print(' Terms read {} size: {}'.format(key, len(hashTermsOrig[key])))
111 - print("hashTermsOrig: {}".format(hashTermsOrig)) 111 + #print("hashTermsOrig: {}".format(hashTermsOrig))
112 112
113 #regularWords = words.words('en') 113 #regularWords = words.words('en')
114 print() 114 print()
...@@ -164,7 +164,10 @@ if __name__ == "__main__": ...@@ -164,7 +164,10 @@ if __name__ == "__main__":
164 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag' 164 line = listLine1[0] + '\t' + listLine1[1] + '\t' + listLine2[0] + ' ' + 'O' + ' TermTag'
165 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag' 165 # line = listLine1[0] + '\t' + termTag + '\t' + listLine2[0] + ' ' + termTag + ' TermTag'
166 #line = word + '\t' + termTag + '\t' + lemma + ' ' + termTag + ' TermTag' 166 #line = word + '\t' + termTag + '\t' + lemma + ' ' + termTag + ' TermTag'
167 - oFile.write(line + '\n') 167 + if found:
168 + oFile.write(line)
169 + else:
170 + oFile.write(line + '\n')
168 filesPreprocessed += 1 171 filesPreprocessed += 1
169 172
170 # Imprime archivos procesados 173 # Imprime archivos procesados
......