nei-demo-backup

Sleeping

App Files Files Community

SmitaGautam commited on Nov 3

Commit

762a449

•

1 Parent(s): 981d617

Update train.py

Browse files

Files changed (1) hide show

train.py +21 -97

train.py CHANGED Viewed

@@ -37,105 +37,29 @@ pos_tags = [ 'CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD',
                 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
             ]
-def feature_vector(w, scaled_position, pos_tag):
-    vec = np.zeros(12).astype(np.float32)
-    #if w[0].isupper():
-        #title = 1
-    #else:
-        #title = 0
-    if w.isupper():
-        allcaps = 1
-    else:
-        allcaps = 0
-    if w in PUNCT:
-        punct = 1
-    else:
-        punct = 0
-    if w.lower() in stopwords:
-      sw=1
-    else:
-      sw=0
-    if w.isdigit():
-      is_digit=1
-    else:
-      is_digit=0
-    if pos_tag in ('VB','VBD','VBG','VBN','VBP','VBZ'):
-      is_verb=1
-    else:
-      is_verb=0
-    #if pos_tag in ('NN','NNP','NNPS','NNS'):
-    if pos_tag in ('NNP','NNPS'):
-      is_noun=1
-    else:
-      is_noun=0
-    if w in places:
-      is_place=1
-    else:
-      is_place=0
-    if w in people:
-      is_people=1
-    else:
-      is_people=0
-    if w in countries:
-      is_country=1
     else:
-      is_country=0
-    if w in nationalities:
-      is_nation=1
     else:
-      is_nation=0
-    # Build vector
-    #vec[0] = title
-    vec[0] = allcaps
-    vec[1] = len(w)
-    vec[2] = punct
-    vec[3] = scaled_position
-    vec[4] = sw
-    vec[5] = is_digit
-    vec[6] = is_verb
-    vec[7] = is_noun
-    vec[8] = is_place
-    vec[9] = is_people
-    vec[10] = is_country
-    vec[11] = is_nation
-    return vec
-def feature_vector_d(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):
-    vec = np.zeros(116).astype('float32')
-    if(word.istitle()):
-        vec[0] = 1
-    if word.lower() in stopwords:
-        vec[1] = 1
-    if(word.isupper()):
-        vec[2] = 1
-    vec[3] = len(word)
-    vec[4] = word.isdigit()
-    if prev_word_pos_tag!=-1:
-      vec[5+prev_word_pos_tag] = 1
-    if next_word_pos_tag!=-1:
-      vec[42+next_word_pos_tag] = 1
-    if current_word_pos_tag!=-1:
-      vec[79+current_word_pos_tag] = 1
-    return vec
 def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):

                 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'
             ]
+def feature_vector(word, scaled_position, current_word_pos_tag):
+    features = []
+    features.append(int(word.lower() in stopwords))
+    features.append(int(word.isupper()))
+    features.append(int(word in PUNCT))
+    features.append(int(word.istitle()))
+    features.append(int(word.isdigit()))
+    # features.append(len(word))
+    features.append(int(word in places))
+    features.append(int(word in people))
+    features.append(int(word in countries))
+    features.append(int(word in nationalities))
+    if (current_word_pos_tag==12) or (current_word_pos_tag==13): ##NNP, NNPS
+        features.append(1)
     else:
+        features.append(0)
+    features.append(scaled_position)
+    if 27 <= current_word_pos_tag <= 32: ##isVERB
+        features.append(1)
     else:
+        features.append(0)
+    return np.asarray(features, dtype = np.float32)
 def feature_vector2(word, prev_word_pos_tag, next_word_pos_tag, current_word_pos_tag):