Carlos-Francisco Méndez-Cruz

Prepare abstracts

......@@ -22,6 +22,8 @@ __author__ = 'CMendezC'
# --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
# --inputFile text-annotated-abstracts-original.txt
# --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
# --dicPath /export/space1/users/compu2/bionlp/nlp-preprocessing-pipeline/dictionaries
# --dicFile genes.txt
# python3 prepare-abstracts.py --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets --inputFile text-annotated-abstracts-original.txt --outputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets/original
if __name__ == "__main__":
......@@ -33,6 +35,10 @@ if __name__ == "__main__":
help="Input file", metavar="FILE")
parser.add_option("--outputPath", dest="outputPath",
help="Output path", metavar="PATH")
parser.add_option("--dicPath", dest="dicPath",
help="Dictionary path", metavar="PATH")
parser.add_option("--dicFile", dest="dicFile",
help="Dictionary file", metavar="FILE")
(options, args) = parser.parse_args()
if len(args) > 0:
......@@ -44,6 +50,8 @@ if __name__ == "__main__":
print("Input path: " + str(options.inputPath))
print("Input file", str(options.inputFile))
print("Output path: " + str(options.outputPath))
print("Dictionary path: " + str(options.dicPath))
print("Dictionary file", str(options.dicFile))
filesWritten = 0
t0 = time()
......@@ -69,6 +77,8 @@ if __name__ == "__main__":
oFile.write(line)
else:
print("Warning: line without PMID")
with open(os.path.join(options.dicPath, options.dicFile), "w", encoding="utf-8", errors="replace") as dFile:
for gene in hashGenes.keys():
dFile.write("{}\n".format(gene))
......