Training, crossvalidation and testing structural domain dataset
Showing
1 changed file
with
7 additions
and
17 deletions
| ... | @@ -36,6 +36,7 @@ __author__ = 'CMendezC' | ... | @@ -36,6 +36,7 @@ __author__ = 'CMendezC' |
| 36 | # 11) --kernel Kernel | 36 | # 11) --kernel Kernel |
| 37 | # 12) --reduction Feature selection or dimensionality reduction | 37 | # 12) --reduction Feature selection or dimensionality reduction |
| 38 | # 13) --removeStopWords Remove most frequent words | 38 | # 13) --removeStopWords Remove most frequent words |
| 39 | +# 14) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf. | ||
| 39 | 40 | ||
| 40 | 41 | ||
| 41 | # Ouput: | 42 | # Ouput: |
| ... | @@ -43,22 +44,6 @@ __author__ = 'CMendezC' | ... | @@ -43,22 +44,6 @@ __author__ = 'CMendezC' |
| 43 | 44 | ||
| 44 | # Execution: | 45 | # Execution: |
| 45 | 46 | ||
| 46 | -# python training-crossvalidation-testing-dom.py | ||
| 47 | -# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset | ||
| 48 | -# --inputTrainingData trainData.txt | ||
| 49 | -# --inputTrainingClasses trainClasses.txt | ||
| 50 | -# --inputTestingData testData.txt | ||
| 51 | -# --inputTestingClasses testClasses.txt | ||
| 52 | -# --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models | ||
| 53 | -# --outputModelFile SVM-lineal-model.mod | ||
| 54 | -# --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports | ||
| 55 | -# --outputReportFile SVM-lineal.txt | ||
| 56 | -# --classifier SVM | ||
| 57 | -# --saveData | ||
| 58 | -# --kernel linear | ||
| 59 | -# --reduction SVD200 | ||
| 60 | -# --removeStopWords | ||
| 61 | - | ||
| 62 | # source activate python3 | 47 | # source activate python3 |
| 63 | # python training-crossvalidation-testing-dom.py | 48 | # python training-crossvalidation-testing-dom.py |
| 64 | # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset | 49 | # --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset |
| ... | @@ -75,7 +60,8 @@ __author__ = 'CMendezC' | ... | @@ -75,7 +60,8 @@ __author__ = 'CMendezC' |
| 75 | # --kernel linear | 60 | # --kernel linear |
| 76 | # --reduction SVD200 | 61 | # --reduction SVD200 |
| 77 | # --removeStopWords | 62 | # --removeStopWords |
| 78 | -# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear | 63 | +# --vectorizer b |
| 64 | +# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear --saveData --vectorizer b | ||
| 79 | # --reduction SVD200 | 65 | # --reduction SVD200 |
| 80 | # --removeStopWords | 66 | # --removeStopWords |
| 81 | 67 | ||
| ... | @@ -124,6 +110,9 @@ if __name__ == "__main__": | ... | @@ -124,6 +110,9 @@ if __name__ == "__main__": |
| 124 | parser.add_argument("--ngrfinal", type=int, | 110 | parser.add_argument("--ngrfinal", type=int, |
| 125 | dest="ngrfinal", default=1, | 111 | dest="ngrfinal", default=1, |
| 126 | help="Final n-gram", metavar="INTEGER") | 112 | help="Final n-gram", metavar="INTEGER") |
| 113 | + parser.add_argument("--vectorizer", dest="vectorizer", required=True, | ||
| 114 | + help="Vectorizer: b=binary, f=frequency, t=tf-idf", metavar="CHAR", | ||
| 115 | + choices=('b', 'f', 't'), default='b') | ||
| 127 | 116 | ||
| 128 | args = parser.parse_args() | 117 | args = parser.parse_args() |
| 129 | 118 | ||
| ... | @@ -145,6 +134,7 @@ if __name__ == "__main__": | ... | @@ -145,6 +134,7 @@ if __name__ == "__main__": |
| 145 | print("Remove stop words: " + str(args.removeStopWords)) | 134 | print("Remove stop words: " + str(args.removeStopWords)) |
| 146 | print("Initial ngram: " + str(args.ngrinitial)) | 135 | print("Initial ngram: " + str(args.ngrinitial)) |
| 147 | print("Final ngram: " + str(args.ngrfinal)) | 136 | print("Final ngram: " + str(args.ngrfinal)) |
| 137 | + print("Vectorizer: " + str(args.vectorizer)) | ||
| 148 | 138 | ||
| 149 | # Start time | 139 | # Start time |
| 150 | t0 = time() | 140 | t0 = time() | ... | ... |
-
Please register or login to post a comment