Carlos-Francisco Méndez-Cruz

Obtaining training and test data sets

...@@ -43,6 +43,7 @@ from nltk.corpus import stopwords ...@@ -43,6 +43,7 @@ from nltk.corpus import stopwords
43 # --trainingFile training-data-set-70.txt 43 # --trainingFile training-data-set-70.txt
44 # --testFile test-data-set-30.txt 44 # --testFile test-data-set-30.txt
45 # --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/reports 45 # --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/reports
46 +# python3.4 training-validation-v1.py --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/reports
46 47
47 ################################# 48 #################################
48 # FUNCTIONS # 49 # FUNCTIONS #
...@@ -358,7 +359,7 @@ if __name__ == "__main__": ...@@ -358,7 +359,7 @@ if __name__ == "__main__":
358 continue 359 continue
359 listLine.append(token) 360 listLine.append(token)
360 sentencesTrainingData.append(listLine) 361 sentencesTrainingData.append(listLine)
361 - print " Sentences training data: " + str(len(sentencesTrainingData)) 362 + print(" Sentences training data: " + str(len(sentencesTrainingData)))
362 # print sentencesTrainingData[0] 363 # print sentencesTrainingData[0]
363 364
364 with open(os.path.join(options.inputPath, options.testFile), "r") as iFile: 365 with open(os.path.join(options.inputPath, options.testFile), "r") as iFile:
...@@ -382,7 +383,7 @@ if __name__ == "__main__": ...@@ -382,7 +383,7 @@ if __name__ == "__main__":
382 continue 383 continue
383 listLine.append(token) 384 listLine.append(token)
384 sentencesTestData.append(listLine) 385 sentencesTestData.append(listLine)
385 - print " Sentences test data: " + str(len(sentencesTestData)) 386 + print(" Sentences test data: " + str(len(sentencesTestData)))
386 # print sentencesTestData[0] 387 # print sentencesTestData[0]
387 388
388 print("Reading corpus done in: %fs" % (time() - t0)) 389 print("Reading corpus done in: %fs" % (time() - t0))
......