Showing
1 changed file
with
6 additions
and
5 deletions
... | @@ -332,14 +332,15 @@ if __name__ == "__main__": | ... | @@ -332,14 +332,15 @@ if __name__ == "__main__": |
332 | sentencesTrainingData = [] | 332 | sentencesTrainingData = [] |
333 | sentencesTestData = [] | 333 | sentencesTestData = [] |
334 | 334 | ||
335 | - stopwords = [word.decode('utf-8') for word in stopwords.words('english')] | 335 | + # Original: stopwords = [word.decode('utf-8') for word in stopwords.words('english')] |
336 | + stopwords = [word for word in stopwords.words('english')] | ||
336 | 337 | ||
337 | with open(os.path.join(options.inputPath, options.trainingFile), "r") as iFile: | 338 | with open(os.path.join(options.inputPath, options.trainingFile), "r") as iFile: |
338 | # with open(os.path.join(options.inputPath, options.trainingFile), "r", encoding="utf-8", errors='replace') as iFile: | 339 | # with open(os.path.join(options.inputPath, options.trainingFile), "r", encoding="utf-8", errors='replace') as iFile: |
339 | for line in iFile.readlines(): | 340 | for line in iFile.readlines(): |
340 | listLine = [] | 341 | listLine = [] |
341 | - line = line.decode("utf-8") | 342 | + line = line.strip('\n') |
342 | - for token in line.strip('\n').split(): | 343 | + for token in line.split(): |
343 | if options.filterStopWords: | 344 | if options.filterStopWords: |
344 | listToken = token.split('|') | 345 | listToken = token.split('|') |
345 | lemma = listToken[1] | 346 | lemma = listToken[1] |
... | @@ -366,8 +367,8 @@ if __name__ == "__main__": | ... | @@ -366,8 +367,8 @@ if __name__ == "__main__": |
366 | # with open(os.path.join(options.inputPath, options.testFile), "r", encoding="utf-8", errors='replace') as iFile: | 367 | # with open(os.path.join(options.inputPath, options.testFile), "r", encoding="utf-8", errors='replace') as iFile: |
367 | for line in iFile.readlines(): | 368 | for line in iFile.readlines(): |
368 | listLine = [] | 369 | listLine = [] |
369 | - line = line.decode("utf-8") | 370 | + line = line.strip('\n') |
370 | - for token in line.strip('\n').split(): | 371 | + for token in line.split(): |
371 | if options.filterStopWords: | 372 | if options.filterStopWords: |
372 | listToken = token.split('|') | 373 | listToken = token.split('|') |
373 | lemma = listToken[1] | 374 | lemma = listToken[1] | ... | ... |
-
Please register or login to post a comment