Carlos-Francisco Méndez-Cruz

Training, crossvalidation and testing structural domain dataset

......@@ -36,6 +36,7 @@ __author__ = 'CMendezC'
# 11) --kernel Kernel
# 12) --reduction Feature selection or dimensionality reduction
# 13) --removeStopWords Remove most frequent words
# 14) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
# Ouput:
......@@ -43,22 +44,6 @@ __author__ = 'CMendezC'
# Execution:
# python training-crossvalidation-testing-dom.py
# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset
# --inputTrainingData trainData.txt
# --inputTrainingClasses trainClasses.txt
# --inputTestingData testData.txt
# --inputTestingClasses testClasses.txt
# --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models
# --outputModelFile SVM-lineal-model.mod
# --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports
# --outputReportFile SVM-lineal.txt
# --classifier SVM
# --saveData
# --kernel linear
# --reduction SVD200
# --removeStopWords
# source activate python3
# python training-crossvalidation-testing-dom.py
# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset
......@@ -75,7 +60,8 @@ __author__ = 'CMendezC'
# --kernel linear
# --reduction SVD200
# --removeStopWords
# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear
# --vectorizer b
# python training-crossvalidation-testing-dom.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset --inputTrainingData trainData.txt --inputTrainingClasses trainClasses.txt --inputTestingData testData.txt --inputTestingClasses testClasses.txt --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/models --outputModelFile SVM-lineal-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/structural-domain-dataset/reports --outputReportFile SVM-lineal.txt --classifier SVM --kernel linear --saveData --vectorizer b
# --reduction SVD200
# --removeStopWords
......@@ -124,6 +110,9 @@ if __name__ == "__main__":
parser.add_argument("--ngrfinal", type=int,
dest="ngrfinal", default=1,
help="Final n-gram", metavar="INTEGER")
parser.add_argument("--vectorizer", dest="vectorizer", required=True,
help="Vectorizer: b=binary, f=frequency, t=tf-idf", metavar="CHAR",
choices=('b', 'f', 't'), default='b')
args = parser.parse_args()
......@@ -145,6 +134,7 @@ if __name__ == "__main__":
print("Remove stop words: " + str(args.removeStopWords))
print("Initial ngram: " + str(args.ngrinitial))
print("Final ngram: " + str(args.ngrfinal))
print("Vectorizer: " + str(args.vectorizer))
# Start time
t0 = time()
......