Carlos-Francisco Méndez-Cruz

Training and testing binding thrombin dataset

...@@ -44,6 +44,7 @@ __author__ = 'CMendezC' ...@@ -44,6 +44,7 @@ __author__ = 'CMendezC'
44 # --classifier SVM 44 # --classifier SVM
45 45
46 # source activate python3 46 # source activate python3
47 +# python training-validation-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/models --outputModelFile SVM-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/reports --outputReportFile SVM.txt --classifier SVM
47 48
48 ########################################################### 49 ###########################################################
49 # MAIN PROGRAM # 50 # MAIN PROGRAM #
...@@ -72,22 +73,19 @@ if __name__ == "__main__": ...@@ -72,22 +73,19 @@ if __name__ == "__main__":
72 help="Classifier", metavar="NAME", 73 help="Classifier", metavar="NAME",
73 choices=('BernoulliNB', 'SVM', 'NearestCentroid'), default='SVM') 74 choices=('BernoulliNB', 'SVM', 'NearestCentroid'), default='SVM')
74 75
75 - (options, args) = parser.parse_args() 76 + args = parser.parse_args()
76 - if len(args) > 0:
77 - parser.error("None parameters indicated.")
78 - sys.exit(1)
79 77
80 # Printing parameter values 78 # Printing parameter values
81 print('-------------------------------- PARAMETERS --------------------------------') 79 print('-------------------------------- PARAMETERS --------------------------------')
82 - print("Path to read input files: " + str(options.inputPath)) 80 + print("Path to read input files: " + str(args.inputPath))
83 - print("File to read training data: " + str(options.inputTrainingData)) 81 + print("File to read training data: " + str(args.inputTrainingData))
84 - print("File to read testing data: " + str(options.inputTestingData)) 82 + print("File to read testing data: " + str(args.inputTestingData))
85 - print("File to read testing classes: " + str(options.inputTestingClasses)) 83 + print("File to read testing classes: " + str(args.inputTestingClasses))
86 - print("Path to place output model: " + str(options.outputModelPath)) 84 + print("Path to place output model: " + str(args.outputModelPath))
87 - print("File to place output model: " + str(options.outputModelFile)) 85 + print("File to place output model: " + str(args.outputModelFile))
88 - print("Path to place evaluation report: " + str(options.outputReportPath)) 86 + print("Path to place evaluation report: " + str(args.outputReportPath))
89 - print("File to place evaluation report: " + str(options.outputReportFile)) 87 + print("File to place evaluation report: " + str(args.outputReportFile))
90 - print("Classifier: " + str(options.outputFile)) 88 + print("Classifier: " + str(args.outputFile))
91 89
92 # Start time 90 # Start time
93 t0 = time() 91 t0 = time()
...@@ -95,7 +93,7 @@ if __name__ == "__main__": ...@@ -95,7 +93,7 @@ if __name__ == "__main__":
95 print(" Reading training data and true classes...") 93 print(" Reading training data and true classes...")
96 trainingClasses = [] 94 trainingClasses = []
97 trainingData = [] 95 trainingData = []
98 - with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \ 96 + with open(os.path.join(args.inputPath, args.inputTrainingData), encoding='utf8', mode='r') \
99 as iFile: 97 as iFile:
100 for line in iFile: 98 for line in iFile:
101 line = line.strip('\r\n') 99 line = line.strip('\r\n')
...@@ -113,14 +111,14 @@ if __name__ == "__main__": ...@@ -113,14 +111,14 @@ if __name__ == "__main__":
113 print(" Reading testing data and true classes...") 111 print(" Reading testing data and true classes...")
114 testingClasses = [] 112 testingClasses = []
115 testingData = [] 113 testingData = []
116 - with open(os.path.join(options.inputPath, options.inputTestingData), encoding='utf8', mode='r') \ 114 + with open(os.path.join(args.inputPath, args.inputTestingData), encoding='utf8', mode='r') \
117 as iFile: 115 as iFile:
118 for line in iFile: 116 for line in iFile:
119 line = line.strip('\r\n') 117 line = line.strip('\r\n')
120 listLine = line.split(',') 118 listLine = line.split(',')
121 testingData.append(listLine) 119 testingData.append(listLine)
122 testingMatrix = csr_matrix(testingData, dtype='double') 120 testingMatrix = csr_matrix(testingData, dtype='double')
123 - with open(os.path.join(options.inputPath, options.inputTestingClasses), encoding='utf8', mode='r') \ 121 + with open(os.path.join(args.inputPath, args.inputTestingClasses), encoding='utf8', mode='r') \
124 as iFile: 122 as iFile:
125 for line in iFile: 123 for line in iFile:
126 line = line.strip('\r\n') 124 line = line.strip('\r\n')
...@@ -131,11 +129,11 @@ if __name__ == "__main__": ...@@ -131,11 +129,11 @@ if __name__ == "__main__":
131 print("Number of testing class I: {}".format(trainingClasses.count('I'))) 129 print("Number of testing class I: {}".format(trainingClasses.count('I')))
132 print("Shape of testing matrix: {}".format(testingMatrix.shape)) 130 print("Shape of testing matrix: {}".format(testingMatrix.shape))
133 131
134 - if options.classifier == "MultinomialNB": 132 + if args.classifier == "MultinomialNB":
135 classifier = BernoulliNB() 133 classifier = BernoulliNB()
136 - elif options.classifier == "SVM": 134 + elif args.classifier == "SVM":
137 classifier = SVC() 135 classifier = SVC()
138 - elif options.classifier == "NearestCentroid": 136 + elif args.classifier == "NearestCentroid":
139 classifier = NearestCentroid() 137 classifier = NearestCentroid()
140 138
141 print(" Training...") 139 print(" Training...")
...@@ -147,9 +145,9 @@ if __name__ == "__main__": ...@@ -147,9 +145,9 @@ if __name__ == "__main__":
147 print(" Done!") 145 print(" Done!")
148 146
149 print(" Saving report...") 147 print(" Saving report...")
150 - with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile: 148 + with open(os.path.join(args.outputPath, args.outputFile), mode='w', encoding='utf8') as oFile:
151 oFile.write('********** EVALUATION REPORT **********\n') 149 oFile.write('********** EVALUATION REPORT **********\n')
152 - oFile.write('Classifier: {}\n'.format(options.classifier)) 150 + oFile.write('Classifier: {}\n'.format(args.classifier))
153 oFile.write('Accuracy: {}\n'.format(accuracy_score(testingClasses, y_pred))) 151 oFile.write('Accuracy: {}\n'.format(accuracy_score(testingClasses, y_pred)))
154 oFile.write('Precision: {}\n'.format(precision_score(testingClasses, y_pred, average='weighted'))) 152 oFile.write('Precision: {}\n'.format(precision_score(testingClasses, y_pred, average='weighted')))
155 oFile.write('Recall: {}\n'.format(recall_score(testingClasses, y_pred, average='weighted'))) 153 oFile.write('Recall: {}\n'.format(recall_score(testingClasses, y_pred, average='weighted')))
......