Conditional Random Fields

Carlos-Francisco Méndez-Cruz
Commit 044de01f3d97cd03064fb5772697f67cbc041752 044de01f 1 parent 1a4c85e9
Showing 3 changed files with 5 additions and 71 deletions
training_validation_v1-1.py
training_validation_v2.py
training_validation_v3.py
--- a/training_validation_v1-1.py 0 → 100644
View file @044de01
+++ b/training_validation_v1-1.py 0 → 100644
View file @044de01
--- a/training_validation_v2.py
View file @044de01
+++ b/training_validation_v2.py
View file @044de01
@@ -49,42 +49,21 @@ from nltk.corpus import stopwords
 #################################
 #           FUNCTIONS           #
 #################################
- def endsConLow(word):
-     miregex = re.compile(r'[^aeiouA-Z0-9]$')
-     if miregex.search(word):
-         return True
-     else:
-         return False
- 
 def word2features(sent, i):
     listElem = sent[i].split('|')
     word = listElem[0]
+     #print("word: {}".format(word))
     lemma = listElem[1]
     postag = listElem[2]
 
     features = {
-         # Suffixes
-         #'word[-3:]': word[-3:],
-         #'word[-2:]': word[-2:],
-         #'word[-1:]': word[-1:],
-         #'word.isupper()': word.isupper(),
         #'word': word,
-         #'lemma': lemma,
-         #'postag': postag,
-         'lemma[-3:]': lemma[-3:],
-         'lemma[-2:]': lemma[-2:],
-         'lemma[-1:]': lemma[-1:],
-         'lemma[+3:]': lemma[:3],
-         'lemma[+2:]': lemma[:2],
-         'lemma[+1:]': lemma[:1],
-         #'word[:3]': word[:3],
-         #'word[:2]': word[:2],
-         #'word[:1]': word[:1],
-         #'endsConLow()={}'.format(endsConLow(word)): endsConLow(word),
+         'lemma': lemma,
+         'postag': postag,
     }
     if i > 0:
         listElem = sent[i - 1].split('|')
-         word1 = listElem[0]
+         #word1 = listElem[0]
         lemma1 = listElem[1]
         postag1 = listElem[2]
         features.update({
@@ -95,7 +74,7 @@ def word2features(sent, i):
 
     if i < len(sent) - 1:
         listElem = sent[i + 1].split('|')
-         word1 = listElem[0]
+         #word1 = listElem[0]
         lemma1 = listElem[1]
         postag1 = listElem[2]
         features.update({
@@ -103,53 +82,8 @@ def word2features(sent, i):
             '+1:lemma': lemma1,
             '+1:postag': postag1,
         })
- 
-     '''    
-     if i > 1:
-         listElem = sent[i - 2].split('|')
-         word2 = listElem[0]
-         lemma2 = listElem[1]
-         postag2 = listElem[2]
-         features.update({
-             '-2:word': word2,
-             '-2:lemma': lemma2,
-         })
- 
-     if i < len(sent) - 2:
-         listElem = sent[i + 2].split('|')
-         word2 = listElem[0]
-         lemma2 = listElem[1]
-         postag2 = listElem[2]
-         features.update({
-             '+2:word': word2,
-             '+2:lemma': lemma2,
-         })
- 
-     trigrams = False
-     if trigrams:
-         if i > 2:
-             listElem = sent[i - 3].split('|')
-             word3 = listElem[0]
-             lemma3 = listElem[1]
-             postag3 = listElem[2]
-             features.update({
-                 '-3:word': word3,
-                 '-3:lemma': lemma3,
-             })
- 
-         if i < len(sent) - 3:
-             listElem = sent[i + 3].split('|')
-             word3 = listElem[0]
-             lemma3 = listElem[1]
-             postag3 = listElem[2]
-             features.update({
-                 '+3:word': word3,
-                 '+3:lemma': lemma3,
-             })
-     '''
     return features
 
- 
 def sent2features(sent):
     return [word2features(sent, i) for i in range(len(sent))]
 
--- a/training_validation_v3.py 0 → 100644
View file @044de01
+++ b/training_validation_v3.py 0 → 100644
View file @044de01