Carlos-Francisco Méndez-Cruz

Conditional Random Fields

...@@ -39,7 +39,7 @@ from nltk.corpus import stopwords ...@@ -39,7 +39,7 @@ from nltk.corpus import stopwords
39 # 1) Best model 39 # 1) Best model
40 40
41 # Examples 41 # Examples
42 -# python3.4 training-validation-v1.py 42 +# python training-validation-v1.py
43 # --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets 43 # --inputPath /export/space1/users/compu2/bionlp/conditional-random-fields/data-sets
44 # --trainingFile training-data-set-35.txt 44 # --trainingFile training-data-set-35.txt
45 # --testFile test-data-set-30.txt 45 # --testFile test-data-set-30.txt
...@@ -58,29 +58,29 @@ def word2features(sent, i): ...@@ -58,29 +58,29 @@ def word2features(sent, i):
58 postag = listElem[2] 58 postag = listElem[2]
59 59
60 features = { 60 features = {
61 - 'word': word, 61 + #'word': word,
62 - #'lemma': lemma, 62 + 'lemma': lemma,
63 #'postag': postag, 63 #'postag': postag,
64 } 64 }
65 if i > 0: 65 if i > 0:
66 listElem = sent[i - 1].split('|') 66 listElem = sent[i - 1].split('|')
67 - word1 = listElem[0] 67 + #word1 = listElem[0]
68 lemma1 = listElem[1] 68 lemma1 = listElem[1]
69 postag1 = listElem[2] 69 postag1 = listElem[2]
70 features.update({ 70 features.update({
71 - '-1:word': word1, 71 + #'-1:word': word1,
72 - #'-1:lemma': lemma1, 72 + '-1:lemma': lemma1,
73 #'-1:postag': postag1, 73 #'-1:postag': postag1,
74 }) 74 })
75 75
76 if i < len(sent) - 1: 76 if i < len(sent) - 1:
77 listElem = sent[i + 1].split('|') 77 listElem = sent[i + 1].split('|')
78 - word1 = listElem[0] 78 + #word1 = listElem[0]
79 lemma1 = listElem[1] 79 lemma1 = listElem[1]
80 postag1 = listElem[2] 80 postag1 = listElem[2]
81 features.update({ 81 features.update({
82 - '+1:word': word1, 82 + #'+1:word': word1,
83 - #'+1:lemma': lemma1, 83 + '+1:lemma': lemma1,
84 #'+1:postag': postag1, 84 #'+1:postag': postag1,
85 }) 85 })
86 return features 86 return features
...@@ -234,7 +234,7 @@ if __name__ == "__main__": ...@@ -234,7 +234,7 @@ if __name__ == "__main__":
234 # crf = rs.best_estimator_ 234 # crf = rs.best_estimator_
235 nameReport = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str( 235 nameReport = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str(
236 options.excludeSymbols) + '.txt') 236 options.excludeSymbols) + '.txt')
237 - with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile: 237 + with open(os.path.join(options.outputPath, "reports-l", "report_" + nameReport), mode="w") as oFile:
238 oFile.write("********** TRAINING **********\n") 238 oFile.write("********** TRAINING **********\n")
239 oFile.write("Training file: " + options.trainingFile + '\n') 239 oFile.write("Training file: " + options.trainingFile + '\n')
240 oFile.write('\n') 240 oFile.write('\n')
......