Carlos-Francisco Méndez-Cruz

Conditional Random Fields

...@@ -64,20 +64,20 @@ def word2features(sent, i): ...@@ -64,20 +64,20 @@ def word2features(sent, i):
64 64
65 features = { 65 features = {
66 # Suffixes 66 # Suffixes
67 - 'word[-3:]': word[-3:], 67 + #'word[-3:]': word[-3:],
68 - 'word[-2:]': word[-2:], 68 + #'word[-2:]': word[-2:],
69 - 'word[-1:]': word[-1:], 69 + #'word[-1:]': word[-1:],
70 #'word.isupper()': word.isupper(), 70 #'word.isupper()': word.isupper(),
71 - 'word': word, 71 + #'word': word,
72 'lemma': lemma, 72 'lemma': lemma,
73 'postag': postag, 73 'postag': postag,
74 - 'lemma[-3:]': lemma[-3:], 74 + #'lemma[-3:]': lemma[-3:],
75 - 'lemma[-2:]': lemma[-2:], 75 + #'lemma[-2:]': lemma[-2:],
76 - 'lemma[-1:]': lemma[-1:], 76 + #'lemma[-1:]': lemma[-1:],
77 - 'word[:3]': word[:3], 77 + #'word[:3]': word[:3],
78 - 'word[:2]': word[:2], 78 + #'word[:2]': word[:2],
79 - 'word[:1]': word[:1], 79 + #'word[:1]': word[:1],
80 - 'endsConLow()={}'.format(endsConLow(word)): endsConLow(word), 80 + #'endsConLow()={}'.format(endsConLow(word)): endsConLow(word),
81 } 81 }
82 if i > 0: 82 if i > 0:
83 listElem = sent[i - 1].split('|') 83 listElem = sent[i - 1].split('|')
...@@ -85,7 +85,7 @@ def word2features(sent, i): ...@@ -85,7 +85,7 @@ def word2features(sent, i):
85 lemma1 = listElem[1] 85 lemma1 = listElem[1]
86 postag1 = listElem[2] 86 postag1 = listElem[2]
87 features.update({ 87 features.update({
88 - '-1:word': word1, 88 + #'-1:word': word1,
89 '-1:lemma': lemma1, 89 '-1:lemma': lemma1,
90 '-1:postag': postag1, 90 '-1:postag': postag1,
91 }) 91 })
...@@ -96,7 +96,7 @@ def word2features(sent, i): ...@@ -96,7 +96,7 @@ def word2features(sent, i):
96 lemma1 = listElem[1] 96 lemma1 = listElem[1]
97 postag1 = listElem[2] 97 postag1 = listElem[2]
98 features.update({ 98 features.update({
99 - '+1:word': word1, 99 + #'+1:word': word1,
100 '+1:lemma': lemma1, 100 '+1:lemma': lemma1,
101 '+1:postag': postag1, 101 '+1:postag': postag1,
102 }) 102 })
...@@ -259,8 +259,8 @@ if __name__ == "__main__": ...@@ -259,8 +259,8 @@ if __name__ == "__main__":
259 259
260 print("Reading corpus done in: %fs" % (time() - t0)) 260 print("Reading corpus done in: %fs" % (time() - t0))
261 261
262 - print(sent2features(sentencesTrainingData[0])[0]) 262 + #print(sent2features(sentencesTrainingData[0])[0])
263 - print(sent2features(sentencesTestData[0])[0]) 263 + #print(sent2features(sentencesTestData[0])[0])
264 t0 = time() 264 t0 = time()
265 265
266 X_train = [sent2features(s) for s in sentencesTrainingData] 266 X_train = [sent2features(s) for s in sentencesTrainingData]
......