Carlos-Francisco Méndez-Cruz

Training, crossvalidation and testing structural domain dataset

...@@ -36,6 +36,7 @@ __author__ = 'CMendezC' ...@@ -36,6 +36,7 @@ __author__ = 'CMendezC'
36 # 11) --kernel Kernel 36 # 11) --kernel Kernel
37 # 12) --reduction Feature selection or dimensionality reduction 37 # 12) --reduction Feature selection or dimensionality reduction
38 # 13) --removeStopWords Remove most frequent words 38 # 13) --removeStopWords Remove most frequent words
39 +# 14) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
39 40
40 41
41 # Ouput: 42 # Ouput:
...@@ -43,22 +44,6 @@ __author__ = 'CMendezC' ...@@ -43,22 +44,6 @@ __author__ = 'CMendezC'
43 44
44 # Execution: 45 # Execution:
45 46
46 -# python training-crossvalidation-testing-dom.py
47 -# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset
48 -# --inputTrainingData trainData.txt
49 -# --inputTrainingClasses trainClasses.txt
50 -# --inputTestingData testData.txt
51 -# --inputTestingClasses testClasses.txt
52 -# --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models
53 -# --outputModelFile SVM-lineal-model.mod
54 -# --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports
55 -# --outputReportFile SVM-lineal.txt
56 -# --classifier SVM
57 -# --saveData
58 -# --kernel linear
59 -# --reduction SVD200
60 -# --removeStopWords
61 -
62 # source activate python3 47 # source activate python3
63 # python training-crossvalidation-testing-dom.py 48 # python training-crossvalidation-testing-dom.py
64 # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset 49 # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset
...@@ -75,7 +60,8 @@ __author__ = 'CMendezC' ...@@ -75,7 +60,8 @@ __author__ = 'CMendezC'
75 # --kernel linear 60 # --kernel linear
76 # --reduction SVD200 61 # --reduction SVD200
77 # --removeStopWords 62 # --removeStopWords
78 -# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear 63 +# --vectorizer b
64 +# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear --saveData --vectorizer b
79 # --reduction SVD200 65 # --reduction SVD200
80 # --removeStopWords 66 # --removeStopWords
81 67
...@@ -124,6 +110,9 @@ if __name__ == "__main__": ...@@ -124,6 +110,9 @@ if __name__ == "__main__":
124 parser.add_argument("--ngrfinal", type=int, 110 parser.add_argument("--ngrfinal", type=int,
125 dest="ngrfinal", default=1, 111 dest="ngrfinal", default=1,
126 help="Final n-gram", metavar="INTEGER") 112 help="Final n-gram", metavar="INTEGER")
113 + parser.add_argument("--vectorizer", dest="vectorizer", required=True,
114 + help="Vectorizer: b=binary, f=frequency, t=tf-idf", metavar="CHAR",
115 + choices=('b', 'f', 't'), default='b')
127 116
128 args = parser.parse_args() 117 args = parser.parse_args()
129 118
...@@ -145,6 +134,7 @@ if __name__ == "__main__": ...@@ -145,6 +134,7 @@ if __name__ == "__main__":
145 print("Remove stop words: " + str(args.removeStopWords)) 134 print("Remove stop words: " + str(args.removeStopWords))
146 print("Initial ngram: " + str(args.ngrinitial)) 135 print("Initial ngram: " + str(args.ngrinitial))
147 print("Final ngram: " + str(args.ngrfinal)) 136 print("Final ngram: " + str(args.ngrfinal))
137 + print("Vectorizer: " + str(args.vectorizer))
148 138
149 # Start time 139 # Start time
150 t0 = time() 140 t0 = time()
......