Spaces:
Sleeping
Sleeping
madhavkotecha
commited on
Commit
•
59d8c9f
1
Parent(s):
ce62bb3
Update app.py
Browse files
app.py
CHANGED
@@ -94,12 +94,12 @@ class NEI:
|
|
94 |
# plt.colorbar()
|
95 |
# plt.savefig('Confusion_Matrix.png')
|
96 |
|
97 |
-
def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0
|
98 |
is_titlecase = 1 if w[0].isupper() else 0
|
99 |
is_allcaps = 1 if w.isupper() else 0
|
100 |
is_sw = 1 if w.lower() in SW else 0
|
101 |
is_punct = 1 if w in PUNCT else 0
|
102 |
-
is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
|
103 |
is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
|
104 |
# is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
|
105 |
# is_start_of_sentence = 1 if scaled_position == 0 else 0
|
@@ -113,7 +113,7 @@ class NEI:
|
|
113 |
for i, token in enumerate(tokens):
|
114 |
prev_tag = tags[i - 1] if i > 0 else 0
|
115 |
next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
|
116 |
-
x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag
|
117 |
y = 1 if tags[i] > 0 else 0
|
118 |
features.append(x)
|
119 |
labels.append(y)
|
@@ -140,7 +140,7 @@ class NEI:
|
|
140 |
tokens = word_tokenize(sentence)
|
141 |
features = []
|
142 |
|
143 |
-
raw_features = [self.vectorize(token, i / len(tokens)
|
144 |
raw_features = np.array(raw_features, dtype=np.float32)
|
145 |
scaled_features = self.scaler.transform(raw_features)
|
146 |
y_pred = self.model.predict(scaled_features)
|
@@ -149,7 +149,7 @@ class NEI:
|
|
149 |
prev_tag = y_pred[i - 1] if i > 0 else 0
|
150 |
next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
|
151 |
|
152 |
-
feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag
|
153 |
features.append(feature_with_context)
|
154 |
|
155 |
features = np.array(features, dtype=np.float32)
|
|
|
94 |
# plt.colorbar()
|
95 |
# plt.savefig('Confusion_Matrix.png')
|
96 |
|
97 |
+
def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0):
|
98 |
is_titlecase = 1 if w[0].isupper() else 0
|
99 |
is_allcaps = 1 if w.isupper() else 0
|
100 |
is_sw = 1 if w.lower() in SW else 0
|
101 |
is_punct = 1 if w in PUNCT else 0
|
102 |
+
# is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
|
103 |
is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
|
104 |
# is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
|
105 |
# is_start_of_sentence = 1 if scaled_position == 0 else 0
|
|
|
113 |
for i, token in enumerate(tokens):
|
114 |
prev_tag = tags[i - 1] if i > 0 else 0
|
115 |
next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
|
116 |
+
x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag)
|
117 |
y = 1 if tags[i] > 0 else 0
|
118 |
features.append(x)
|
119 |
labels.append(y)
|
|
|
140 |
tokens = word_tokenize(sentence)
|
141 |
features = []
|
142 |
|
143 |
+
raw_features = [self.vectorize(token, i / len(tokens)) for i, token in enumerate(tokens)]
|
144 |
raw_features = np.array(raw_features, dtype=np.float32)
|
145 |
scaled_features = self.scaler.transform(raw_features)
|
146 |
y_pred = self.model.predict(scaled_features)
|
|
|
149 |
prev_tag = y_pred[i - 1] if i > 0 else 0
|
150 |
next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
|
151 |
|
152 |
+
feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag)
|
153 |
features.append(feature_with_context)
|
154 |
|
155 |
features = np.array(features, dtype=np.float32)
|