upload

Estefani Gaytan Nunez
Commit f91c3acb70a5a5f7bdbe0bf3b0a9d2926ae4ccac f91c3acb 1 parent 1d5f7e2b
Showing 6 changed files with 9 additions and 4 deletions
predict-annot/bin/tagging/tagging_v2.py
predict-annot/output/annot-input_bg_outputIII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/output/annot-input_bg_outputI_v5.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
predict-annot/reports/annot-input_bg_report_v4.txt
predict-annot/reports/output_tagging_report_v5.txt
--- a/predict-annot/bin/tagging/tagging_v2.py
View file @f91c3ac
+++ b/predict-annot/bin/tagging/tagging_v2.py
View file @f91c3ac
@@ -138,7 +138,8 @@ if __name__ == "__main__":
             sentencesOutputDataI = []
             # Preprocessing input sentences
             with open(os.path.join(options.inputPath, file), "r") as iFile:
-                 sentencesInputData = [ line.strip('\n').split() for line in iFile]
+                 lines = iFile.readlines()
+                 sentencesInputData = [ line.strip('\n').split() for line in lines]
                 # Save input sentences    
                 X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData]
                 print("Sentences input data: " + str(len(sentencesInputData)))               
@@ -148,12 +149,13 @@ if __name__ == "__main__":
                 print("Predicting tags with model...")
                 y_pred = crf.predict(X_input)
                 
+                 #print(y_pred)
                 print("Prediction done in: %fs" % (time() - t1))                
                 
                 ########################################### Tagging with CRF model ###########################################
                 print("Tagging file...")
                 lidx = 0
-                 for line, tagLine in zip(iFile.readlines(), y_pred):
+                 for line, tagLine in zip(lines, y_pred):
                     # unique tags
                     Ltags = set(labels).intersection(set(tagLine))
                     # Skip untagged sentence
@@ -178,6 +180,7 @@ if __name__ == "__main__":
                         else:                             
                             outputLine = line.split(' ')[0]
                         # Saving Sentence Ouput I
+                         print(outputLine)
                         sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags))
                         # Increase sentence counter
                         lidx += 1
@@ -212,8 +215,10 @@ if __name__ == "__main__":
                         outputLine += word.split('|')[0] + ' '
                         i += 1                        
                     # Saving Sentence Ouput I 
+                     print(outputLine)
                     sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags))
                     lidx += 1
+                 
                 print("\n".join(sentencesOutputDataI[1:3]))
             ########################################### Save Output I ##########################################
             print("Saving Ouput I...")
@@ -239,7 +244,7 @@ if __name__ == "__main__":
             ########################################### Save Output III ##########################################
             print("Saving Ouput III...")
             with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII:
-                     for line, tagLine in zip(iFile.readlines(), y_pred):                    
+                 for line, tagLine in zip(lines, y_pred):                    
                     oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)]
                     
                     oFileIII.write(' '.join(oline) + '\n')  
@@ -249,4 +254,4 @@ if __name__ == "__main__":
             # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries
             with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp:
                 json.dump(y_probs, fp)
-     print("Processing corpus done in: %fs" % (time() - t0))
+     print("Pssing corpus done in: %fs" % (time() - t0))
--- a/predict-annot/output/annot-input_bg_outputIII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
+++ b/predict-annot/output/annot-input_bg_outputIII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
--- a/predict-annot/output/annot-input_bg_outputII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
+++ b/predict-annot/output/annot-input_bg_outputII_v5_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
--- a/predict-annot/output/annot-input_bg_outputI_v5.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
+++ b/predict-annot/output/annot-input_bg_outputI_v5.txt_model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10.tsv
View file @f91c3ac
--- a/predict-annot/reports/annot-input_bg_report_v4.txt 0 → 100644
View file @f91c3ac
+++ b/predict-annot/reports/annot-input_bg_report_v4.txt 0 → 100644
View file @f91c3ac
--- a/predict-annot/reports/output_tagging_report_v5.txt
View file @f91c3ac
+++ b/predict-annot/reports/output_tagging_report_v5.txt
View file @f91c3ac