Carlos-Francisco Méndez-Cruz

Conditional Random Fields

......@@ -64,20 +64,20 @@ def word2features(sent, i):
features = {
# Suffixes
'word[-3:]': word[-3:],
'word[-2:]': word[-2:],
'word[-1:]': word[-1:],
#'word[-3:]': word[-3:],
#'word[-2:]': word[-2:],
#'word[-1:]': word[-1:],
#'word.isupper()': word.isupper(),
'word': word,
#'word': word,
'lemma': lemma,
'postag': postag,
'lemma[-3:]': lemma[-3:],
'lemma[-2:]': lemma[-2:],
'lemma[-1:]': lemma[-1:],
'word[:3]': word[:3],
'word[:2]': word[:2],
'word[:1]': word[:1],
'endsConLow()={}'.format(endsConLow(word)): endsConLow(word),
#'lemma[-3:]': lemma[-3:],
#'lemma[-2:]': lemma[-2:],
#'lemma[-1:]': lemma[-1:],
#'word[:3]': word[:3],
#'word[:2]': word[:2],
#'word[:1]': word[:1],
#'endsConLow()={}'.format(endsConLow(word)): endsConLow(word),
}
if i > 0:
listElem = sent[i - 1].split('|')
......@@ -85,7 +85,7 @@ def word2features(sent, i):
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
'-1:word': word1,
#'-1:word': word1,
'-1:lemma': lemma1,
'-1:postag': postag1,
})
......@@ -96,7 +96,7 @@ def word2features(sent, i):
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
'+1:word': word1,
#'+1:word': word1,
'+1:lemma': lemma1,
'+1:postag': postag1,
})
......@@ -259,8 +259,8 @@ if __name__ == "__main__":
print("Reading corpus done in: %fs" % (time() - t0))
print(sent2features(sentencesTrainingData[0])[0])
print(sent2features(sentencesTestData[0])[0])
#print(sent2features(sentencesTrainingData[0])[0])
#print(sent2features(sentencesTestData[0])[0])
t0 = time()
X_train = [sent2features(s) for s in sentencesTrainingData]
......