Carlos-Francisco Méndez-Cruz

Classification transcription factor structural domain sentences

......@@ -47,10 +47,10 @@ from scipy.sparse import csr_matrix
# source activate python3
# python training-crossvalidation-testing-dom-v1.py
# --inputPath /home/text-dom-dataset
# --inputTrainingData trainData.txt
# --inputTrainingClasses trainClasses.txt
# --inputTestingData testData.txt
# --inputTestingClasses testClasses.txt
# --inputTrainingData train-data.txt
# --inputTrainingClasses train-classes.txt
# --inputTestingData test-data.txt
# --inputTestingClasses test-classes.txt
# --outputModelPath /home/text-dom-dataset/models
# --outputModelFile SVM-lineal-model.mod
# --outputReportPath /home/text-dom-dataset/reports
......@@ -59,8 +59,10 @@ from scipy.sparse import csr_matrix
# --saveData
# --kernel linear
# --vectorizer b
# --ngrinitial 2
# --ngrfinal 2
# --ngrinitial 1
# --ngrfinal 1
# python training-crossvalidation-testing-dom-v1.py --inputPath /home/laigen-supervised-learning/text-dom-dataset --inputTrainingData train-data.txt --inputTrainingClasses train-classes.txt --inputTestingData test-data.txt --inputTestingClasses test-classes.txt --outputModelPath /home/laigen-supervised-learning/text-dom-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/laigen-supervised-learning/text-dom-dataset/reports --outputReportFile SVM-linear.txt --classifier SVM --saveData --kernel linear --vectorizer b --ngrinitial 1 --ngrfinal 1
###########################################################
# MAIN PROGRAM #
......@@ -284,6 +286,8 @@ if __name__ == "__main__":
X_test = reduc.transform(X_test)
y_pred = myClassifier.predict(X_test)
best_parameters = myClassifier.best_estimator_.get_params()
if args.classifier == "SVM":
confidence_scores = classifier.decision_function(X_test)
print(" Done!")
print("Saving report...")
......@@ -304,6 +308,13 @@ if __name__ == "__main__":
oFile.write('Best parameters: \n')
for param in sorted(best_parameters.keys()):
oFile.write("\t%s: %r\n" % (param, best_parameters[param]))
if args.classifier == "SVM":
oFile.write('\nWeights assigned to the features: \n')
oFile.write("{}\n".format(classifier.coef_))
oFile.write('Confidence scores: \n')
oFile.write("{}\n".format(confidence_scores))
oFile.write('Number of support vectors per class: \n{}\n'.format(classifier.n_support_))
oFile.write('Support vectors: \n{}\n'.format(classifier.support_vectors_))
print(" Done!")
......