Estefani Gaytan Nunez

upload

......@@ -193,7 +193,7 @@ if __name__ == "__main__":
else:
outputLine = line.split(' ')[0]
#print(outputLine + '\t' + ', '.join(Ltags))
sentencesOutputDataI.append([outputLine, ', '.join(Ltags)])
sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + ', '.join(Ltags))
sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag)
continue
......@@ -236,7 +236,7 @@ if __name__ == "__main__":
if sb:
sentence+= word.split('|')[0] + ' '
#print(outputLine + '\t' + ', '.join(Ltags))
sentencesOutputDataI.append([outputLine, ', '.join(Ltags)])
sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ ', '.join(Ltags))
lidx += 1
#print( DF(sentencesOutputDataI) )
......@@ -246,5 +246,10 @@ if __name__ == "__main__":
for line in sentencesOutputDataII:
#print(line)
oFile.write(line + '\n')
with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI:
for line in sentencesOutputDataI:
if re.findall('</', line):
print(line)
#oFileI.write(line + '\n')
print("Processing corpus done in: %fs" % (time() - t0))
......
......@@ -17,16 +17,10 @@
Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
-------------------------------- PROCESSING --------------------------------
Reading CRF model...
Reading CRF model done in: 0.009649s
Reading CRF model done in: 0.009804s
Processing corpus...
Preprocessing file...annot-input_bg_v3.txt
Sentences input data: 14716
Predicting tags with model
Prediction done in: 1.757922s
Prediction done in: 1.811103s
Tagging file
Preprocessing file...annot-input_bg_v4.txt
Sentences input data: 90904
Predicting tags with model
Prediction done in: 26.068446s
Tagging file
Processing corpus done in: 58.020686s
......