Training, crossvalidation and testing structural domain dataset
Showing
1 changed file
with
7 additions
and
17 deletions
... | @@ -36,6 +36,7 @@ __author__ = 'CMendezC' | ... | @@ -36,6 +36,7 @@ __author__ = 'CMendezC' |
36 | # 11) --kernel Kernel | 36 | # 11) --kernel Kernel |
37 | # 12) --reduction Feature selection or dimensionality reduction | 37 | # 12) --reduction Feature selection or dimensionality reduction |
38 | # 13) --removeStopWords Remove most frequent words | 38 | # 13) --removeStopWords Remove most frequent words |
39 | +# 14) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf. | ||
39 | 40 | ||
40 | 41 | ||
41 | # Ouput: | 42 | # Ouput: |
... | @@ -43,22 +44,6 @@ __author__ = 'CMendezC' | ... | @@ -43,22 +44,6 @@ __author__ = 'CMendezC' |
43 | 44 | ||
44 | # Execution: | 45 | # Execution: |
45 | 46 | ||
46 | -# python training-crossvalidation-testing-dom.py | ||
47 | -# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset | ||
48 | -# --inputTrainingData trainData.txt | ||
49 | -# --inputTrainingClasses trainClasses.txt | ||
50 | -# --inputTestingData testData.txt | ||
51 | -# --inputTestingClasses testClasses.txt | ||
52 | -# --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models | ||
53 | -# --outputModelFile SVM-lineal-model.mod | ||
54 | -# --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports | ||
55 | -# --outputReportFile SVM-lineal.txt | ||
56 | -# --classifier SVM | ||
57 | -# --saveData | ||
58 | -# --kernel linear | ||
59 | -# --reduction SVD200 | ||
60 | -# --removeStopWords | ||
61 | - | ||
62 | # source activate python3 | 47 | # source activate python3 |
63 | # python training-crossvalidation-testing-dom.py | 48 | # python training-crossvalidation-testing-dom.py |
64 | # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset | 49 | # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset |
... | @@ -75,7 +60,8 @@ __author__ = 'CMendezC' | ... | @@ -75,7 +60,8 @@ __author__ = 'CMendezC' |
75 | # --kernel linear | 60 | # --kernel linear |
76 | # --reduction SVD200 | 61 | # --reduction SVD200 |
77 | # --removeStopWords | 62 | # --removeStopWords |
78 | -# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear | 63 | +# --vectorizer b |
64 | +# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear --saveData --vectorizer b | ||
79 | # --reduction SVD200 | 65 | # --reduction SVD200 |
80 | # --removeStopWords | 66 | # --removeStopWords |
81 | 67 | ||
... | @@ -124,6 +110,9 @@ if __name__ == "__main__": | ... | @@ -124,6 +110,9 @@ if __name__ == "__main__": |
124 | parser.add_argument("--ngrfinal", type=int, | 110 | parser.add_argument("--ngrfinal", type=int, |
125 | dest="ngrfinal", default=1, | 111 | dest="ngrfinal", default=1, |
126 | help="Final n-gram", metavar="INTEGER") | 112 | help="Final n-gram", metavar="INTEGER") |
113 | + parser.add_argument("--vectorizer", dest="vectorizer", required=True, | ||
114 | + help="Vectorizer: b=binary, f=frequency, t=tf-idf", metavar="CHAR", | ||
115 | + choices=('b', 'f', 't'), default='b') | ||
127 | 116 | ||
128 | args = parser.parse_args() | 117 | args = parser.parse_args() |
129 | 118 | ||
... | @@ -145,6 +134,7 @@ if __name__ == "__main__": | ... | @@ -145,6 +134,7 @@ if __name__ == "__main__": |
145 | print("Remove stop words: " + str(args.removeStopWords)) | 134 | print("Remove stop words: " + str(args.removeStopWords)) |
146 | print("Initial ngram: " + str(args.ngrinitial)) | 135 | print("Initial ngram: " + str(args.ngrinitial)) |
147 | print("Final ngram: " + str(args.ngrfinal)) | 136 | print("Final ngram: " + str(args.ngrfinal)) |
137 | + print("Vectorizer: " + str(args.vectorizer)) | ||
148 | 138 | ||
149 | # Start time | 139 | # Start time |
150 | t0 = time() | 140 | t0 = time() | ... | ... |
-
Please register or login to post a comment