Carlos-Francisco Méndez-Cruz

Obtaining training and test data sets

......@@ -43,6 +43,7 @@ from nltk.corpus import stopwords
# --trainingFile training-data-set-70.txt
# --testFile test-data-set-30.txt
# --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/reports
# python3.4 training-validation-v1.py --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/reports
#################################
# FUNCTIONS #
......@@ -358,7 +359,7 @@ if __name__ == "__main__":
continue
listLine.append(token)
sentencesTrainingData.append(listLine)
print " Sentences training data: " + str(len(sentencesTrainingData))
print(" Sentences training data: " + str(len(sentencesTrainingData)))
# print sentencesTrainingData[0]
with open(os.path.join(options.inputPath, options.testFile), "r") as iFile:
......@@ -382,7 +383,7 @@ if __name__ == "__main__":
continue
listLine.append(token)
sentencesTestData.append(listLine)
print " Sentences test data: " + str(len(sentencesTestData))
print(" Sentences test data: " + str(len(sentencesTestData)))
# print sentencesTestData[0]
print("Reading corpus done in: %fs" % (time() - t0))
......