Estefani Gaytan Nunez

upload

...@@ -193,7 +193,7 @@ if __name__ == "__main__": ...@@ -193,7 +193,7 @@ if __name__ == "__main__":
193 else: 193 else:
194 outputLine = line.split(' ')[0] 194 outputLine = line.split(' ')[0]
195 #print(outputLine + '\t' + ', '.join(Ltags)) 195 #print(outputLine + '\t' + ', '.join(Ltags))
196 - sentencesOutputDataI.append([outputLine, ', '.join(Ltags)]) 196 + sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + ', '.join(Ltags))
197 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag) 197 sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag)
198 continue 198 continue
199 199
...@@ -236,7 +236,7 @@ if __name__ == "__main__": ...@@ -236,7 +236,7 @@ if __name__ == "__main__":
236 if sb: 236 if sb:
237 sentence+= word.split('|')[0] + ' ' 237 sentence+= word.split('|')[0] + ' '
238 #print(outputLine + '\t' + ', '.join(Ltags)) 238 #print(outputLine + '\t' + ', '.join(Ltags))
239 - sentencesOutputDataI.append([outputLine, ', '.join(Ltags)]) 239 + sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ ', '.join(Ltags))
240 lidx += 1 240 lidx += 1
241 241
242 #print( DF(sentencesOutputDataI) ) 242 #print( DF(sentencesOutputDataI) )
...@@ -246,5 +246,10 @@ if __name__ == "__main__": ...@@ -246,5 +246,10 @@ if __name__ == "__main__":
246 for line in sentencesOutputDataII: 246 for line in sentencesOutputDataII:
247 #print(line) 247 #print(line)
248 oFile.write(line + '\n') 248 oFile.write(line + '\n')
249 + with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI:
250 + for line in sentencesOutputDataI:
251 + if re.findall('</', line):
252 + print(line)
253 + #oFileI.write(line + '\n')
249 254
250 print("Processing corpus done in: %fs" % (time() - t0)) 255 print("Processing corpus done in: %fs" % (time() - t0))
......
...@@ -17,16 +17,10 @@ ...@@ -17,16 +17,10 @@
17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False 17 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
18 -------------------------------- PROCESSING -------------------------------- 18 -------------------------------- PROCESSING --------------------------------
19 Reading CRF model... 19 Reading CRF model...
20 -Reading CRF model done in: 0.009649s 20 +Reading CRF model done in: 0.009804s
21 Processing corpus... 21 Processing corpus...
22 Preprocessing file...annot-input_bg_v3.txt 22 Preprocessing file...annot-input_bg_v3.txt
23 Sentences input data: 14716 23 Sentences input data: 14716
24 Predicting tags with model 24 Predicting tags with model
25 -Prediction done in: 1.757922s 25 +Prediction done in: 1.811103s
26 Tagging file 26 Tagging file
27 -Preprocessing file...annot-input_bg_v4.txt
28 -Sentences input data: 90904
29 -Predicting tags with model
30 -Prediction done in: 26.068446s
31 -Tagging file
32 -Processing corpus done in: 58.020686s
......