Carlos-Francisco Méndez-Cruz

Classification transcription factor structural domain sentences

...@@ -47,10 +47,10 @@ from scipy.sparse import csr_matrix ...@@ -47,10 +47,10 @@ from scipy.sparse import csr_matrix
47 # source activate python3 47 # source activate python3
48 # python training-crossvalidation-testing-dom-v1.py 48 # python training-crossvalidation-testing-dom-v1.py
49 # --inputPath /home/text-dom-dataset 49 # --inputPath /home/text-dom-dataset
50 -# --inputTrainingData trainData.txt 50 +# --inputTrainingData train-data.txt
51 -# --inputTrainingClasses trainClasses.txt 51 +# --inputTrainingClasses train-classes.txt
52 -# --inputTestingData testData.txt 52 +# --inputTestingData test-data.txt
53 -# --inputTestingClasses testClasses.txt 53 +# --inputTestingClasses test-classes.txt
54 # --outputModelPath /home/text-dom-dataset/models 54 # --outputModelPath /home/text-dom-dataset/models
55 # --outputModelFile SVM-lineal-model.mod 55 # --outputModelFile SVM-lineal-model.mod
56 # --outputReportPath /home/text-dom-dataset/reports 56 # --outputReportPath /home/text-dom-dataset/reports
...@@ -59,8 +59,10 @@ from scipy.sparse import csr_matrix ...@@ -59,8 +59,10 @@ from scipy.sparse import csr_matrix
59 # --saveData 59 # --saveData
60 # --kernel linear 60 # --kernel linear
61 # --vectorizer b 61 # --vectorizer b
62 -# --ngrinitial 2 62 +# --ngrinitial 1
63 -# --ngrfinal 2 63 +# --ngrfinal 1
64 +
65 +# python training-crossvalidation-testing-dom-v1.py --inputPath /home/laigen-supervised-learning/text-dom-dataset --inputTrainingData train-data.txt --inputTrainingClasses train-classes.txt --inputTestingData test-data.txt --inputTestingClasses test-classes.txt --outputModelPath /home/laigen-supervised-learning/text-dom-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/laigen-supervised-learning/text-dom-dataset/reports --outputReportFile SVM-linear.txt --classifier SVM --saveData --kernel linear --vectorizer b --ngrinitial 1 --ngrfinal 1
64 66
65 ########################################################### 67 ###########################################################
66 # MAIN PROGRAM # 68 # MAIN PROGRAM #
...@@ -284,6 +286,8 @@ if __name__ == "__main__": ...@@ -284,6 +286,8 @@ if __name__ == "__main__":
284 X_test = reduc.transform(X_test) 286 X_test = reduc.transform(X_test)
285 y_pred = myClassifier.predict(X_test) 287 y_pred = myClassifier.predict(X_test)
286 best_parameters = myClassifier.best_estimator_.get_params() 288 best_parameters = myClassifier.best_estimator_.get_params()
289 + if args.classifier == "SVM":
290 + confidence_scores = classifier.decision_function(X_test)
287 print(" Done!") 291 print(" Done!")
288 292
289 print("Saving report...") 293 print("Saving report...")
...@@ -304,6 +308,13 @@ if __name__ == "__main__": ...@@ -304,6 +308,13 @@ if __name__ == "__main__":
304 oFile.write('Best parameters: \n') 308 oFile.write('Best parameters: \n')
305 for param in sorted(best_parameters.keys()): 309 for param in sorted(best_parameters.keys()):
306 oFile.write("\t%s: %r\n" % (param, best_parameters[param])) 310 oFile.write("\t%s: %r\n" % (param, best_parameters[param]))
311 + if args.classifier == "SVM":
312 + oFile.write('\nWeights assigned to the features: \n')
313 + oFile.write("{}\n".format(classifier.coef_))
314 + oFile.write('Confidence scores: \n')
315 + oFile.write("{}\n".format(confidence_scores))
316 + oFile.write('Number of support vectors per class: \n{}\n'.format(classifier.n_support_))
317 + oFile.write('Support vectors: \n{}\n'.format(classifier.support_vectors_))
307 318
308 print(" Done!") 319 print(" Done!")
309 320
......