Carlos-Francisco Méndez-Cruz

Prepare abstracts

......@@ -20,7 +20,7 @@ __author__ = 'CMendezC'
# Execution:
# python3 prepare-abstracts.py
# --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
# --inputFile text-annotated-abstracts-original.txt
# --inputFile text-annotated-abstracts.txt
# --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
# --dicPath /export/space1/users/compu2/bionlp/nlp-preprocessing-pipeline/dictionaries
# --dicFile genes.txt
......@@ -64,7 +64,7 @@ if __name__ == "__main__":
with open(os.path.join(options.inputPath, options.inputFile), "r", encoding="utf-8", errors="replace") as iFile:
print("Reading file..." + options.inputFile)
for line in iFile:
line = line.strip('\n')
line = line.strip('\r\n')
for gene in reGene.findall(line):
# print("genes: {}".format(gene))
if gene not in hashGenes:
......