Carlos-Francisco Méndez-Cruz

Training and testing binding thrombin dataset

......@@ -44,6 +44,7 @@ __author__ = 'CMendezC'
# --classifier SVM
# source activate python3
# python training-validation-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/models --outputModelFile SVM-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/reports --outputReportFile SVM.txt --classifier SVM
###########################################################
# MAIN PROGRAM #
......@@ -72,22 +73,19 @@ if __name__ == "__main__":
help="Classifier", metavar="NAME",
choices=('BernoulliNB', 'SVM', 'NearestCentroid'), default='SVM')
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error("None parameters indicated.")
sys.exit(1)
args = parser.parse_args()
# Printing parameter values
print('-------------------------------- PARAMETERS --------------------------------')
print("Path to read input files: " + str(options.inputPath))
print("File to read training data: " + str(options.inputTrainingData))
print("File to read testing data: " + str(options.inputTestingData))
print("File to read testing classes: " + str(options.inputTestingClasses))
print("Path to place output model: " + str(options.outputModelPath))
print("File to place output model: " + str(options.outputModelFile))
print("Path to place evaluation report: " + str(options.outputReportPath))
print("File to place evaluation report: " + str(options.outputReportFile))
print("Classifier: " + str(options.outputFile))
print("Path to read input files: " + str(args.inputPath))
print("File to read training data: " + str(args.inputTrainingData))
print("File to read testing data: " + str(args.inputTestingData))
print("File to read testing classes: " + str(args.inputTestingClasses))
print("Path to place output model: " + str(args.outputModelPath))
print("File to place output model: " + str(args.outputModelFile))
print("Path to place evaluation report: " + str(args.outputReportPath))
print("File to place evaluation report: " + str(args.outputReportFile))
print("Classifier: " + str(args.outputFile))
# Start time
t0 = time()
......@@ -95,7 +93,7 @@ if __name__ == "__main__":
print(" Reading training data and true classes...")
trainingClasses = []
trainingData = []
with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \
with open(os.path.join(args.inputPath, args.inputTrainingData), encoding='utf8', mode='r') \
as iFile:
for line in iFile:
line = line.strip('\r\n')
......@@ -113,14 +111,14 @@ if __name__ == "__main__":
print(" Reading testing data and true classes...")
testingClasses = []
testingData = []
with open(os.path.join(options.inputPath, options.inputTestingData), encoding='utf8', mode='r') \
with open(os.path.join(args.inputPath, args.inputTestingData), encoding='utf8', mode='r') \
as iFile:
for line in iFile:
line = line.strip('\r\n')
listLine = line.split(',')
testingData.append(listLine)
testingMatrix = csr_matrix(testingData, dtype='double')
with open(os.path.join(options.inputPath, options.inputTestingClasses), encoding='utf8', mode='r') \
with open(os.path.join(args.inputPath, args.inputTestingClasses), encoding='utf8', mode='r') \
as iFile:
for line in iFile:
line = line.strip('\r\n')
......@@ -131,11 +129,11 @@ if __name__ == "__main__":
print("Number of testing class I: {}".format(trainingClasses.count('I')))
print("Shape of testing matrix: {}".format(testingMatrix.shape))
if options.classifier == "MultinomialNB":
if args.classifier == "MultinomialNB":
classifier = BernoulliNB()
elif options.classifier == "SVM":
elif args.classifier == "SVM":
classifier = SVC()
elif options.classifier == "NearestCentroid":
elif args.classifier == "NearestCentroid":
classifier = NearestCentroid()
print(" Training...")
......@@ -147,9 +145,9 @@ if __name__ == "__main__":
print(" Done!")
print(" Saving report...")
with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
with open(os.path.join(args.outputPath, args.outputFile), mode='w', encoding='utf8') as oFile:
oFile.write('********** EVALUATION REPORT **********\n')
oFile.write('Classifier: {}\n'.format(options.classifier))
oFile.write('Classifier: {}\n'.format(args.classifier))
oFile.write('Accuracy: {}\n'.format(accuracy_score(testingClasses, y_pred)))
oFile.write('Precision: {}\n'.format(precision_score(testingClasses, y_pred, average='weighted')))
oFile.write('Recall: {}\n'.format(recall_score(testingClasses, y_pred, average='weighted')))
......