upload

Estefani Gaytan Nunez
Commit 87ac8726273cd56e95224f55c1678a8140496d50 87ac8726 1 parent 7b9766f4
Showing 4 changed files with 9 additions and 10 deletions
predict-annot/bin/tagging/tagging.py
predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/reports/output_tagging_report_v4.txt
--- a/predict-annot/bin/tagging/tagging.py
View file @87ac872
+++ b/predict-annot/bin/tagging/tagging.py
View file @87ac872
@@ -193,7 +193,7 @@ if __name__ == "__main__":
                         else:                             
                             outputLine = line.split(' ')[0]
                         #print(outputLine + '\t' + ', '.join(Ltags))
-                        sentencesOutputDataI.append([outputLine, ', '.join(Ltags)])
+                        sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + ', '.join(Ltags))
                         sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag)
                         continue
@@ -236,7 +236,7 @@ if __name__ == "__main__":
                         if sb:
                             sentence+= word.split('|')[0] + ' '
                     #print(outputLine + '\t' + ', '.join(Ltags))                  
-                    sentencesOutputDataI.append([outputLine, ', '.join(Ltags)])
+                    sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ ', '.join(Ltags))
                     lidx += 1
             #print( DF(sentencesOutputDataI) )
@@ -246,5 +246,10 @@ if __name__ == "__main__":
                 for line in sentencesOutputDataII:
                     #print(line)
                     oFile.write(line + '\n')
+            with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI:
+                for line in sentencesOutputDataI:
+                    if re.findall('</', line):
+                        print(line)
+                    #oFileI.write(line + '\n')
     print("Processing corpus done in: %fs" % (time() - t0))
--- a/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @87ac872
+++ b/predict-annot/output/annot-input_bg_outputII_v4_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @87ac872
--- a/predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @87ac872
+++ b/predict-annot/output/annot-input_bg_outputI_v4.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv 0 → 100644
View file @87ac872
--- a/predict-annot/reports/output_tagging_report_v4.txt
View file @87ac872
+++ b/predict-annot/reports/output_tagging_report_v4.txt
View file @87ac872
@@ -17,16 +17,10 @@
 Filtering symbols ['.', ',', ':', ';', '?', '!', "'", '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']: False
 -------------------------------- PROCESSING --------------------------------
 Reading CRF model...
-Reading CRF model done in: 0.009649s
+Reading CRF model done in: 0.009804s
 Processing corpus...
 Preprocessing file...annot-input_bg_v3.txt
 Sentences input data: 14716
 Predicting tags with model
-Prediction done in: 1.757922s
+Prediction done in: 1.811103s
 Tagging file
-Preprocessing file...annot-input_bg_v4.txt
-Sentences input data: 90904
-Predicting tags with model
-Prediction done in: 26.068446s
-Tagging file
-Processing corpus done in: 58.020686s