Carlos-Francisco Méndez-Cruz

Conditional Random Fields

......@@ -39,7 +39,7 @@ from nltk.corpus import stopwords
# 1) Best model
# Examples
# python3.4 training-validation-v1.py
# python training-validation-v1.py
# --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
# --trainingFile training-data-set-35.txt
# --testFile test-data-set-30.txt
......@@ -58,29 +58,29 @@ def word2features(sent, i):
postag = listElem[2]
features = {
'word': word,
#'lemma': lemma,
#'word': word,
'lemma': lemma,
#'postag': postag,
}
if i > 0:
listElem = sent[i - 1].split('|')
word1 = listElem[0]
#word1 = listElem[0]
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
'-1:word': word1,
#'-1:lemma': lemma1,
#'-1:word': word1,
'-1:lemma': lemma1,
#'-1:postag': postag1,
})
if i < len(sent) - 1:
listElem = sent[i + 1].split('|')
word1 = listElem[0]
#word1 = listElem[0]
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
'+1:word': word1,
#'+1:lemma': lemma1,
#'+1:word': word1,
'+1:lemma': lemma1,
#'+1:postag': postag1,
})
return features
......@@ -234,7 +234,7 @@ if __name__ == "__main__":
# crf = rs.best_estimator_
nameReport = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str(
options.excludeSymbols) + '.txt')
with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile:
with open(os.path.join(options.outputPath, "reports-l", "report_" + nameReport), mode="w") as oFile:
oFile.write("********** TRAINING **********\n")
oFile.write("Training file: " + options.trainingFile + '\n')
oFile.write('\n')
......