Spaces:
Running
Running
word embeddings
Browse files
app.py
CHANGED
@@ -36,7 +36,7 @@ def word_embedding_space_analysis(
|
|
36 |
S, V, D = torch.linalg.svd(matrix)
|
37 |
|
38 |
data = []
|
39 |
-
top =
|
40 |
select_words = 20
|
41 |
n_dim = 10
|
42 |
for _i in range(n_dim):
|
@@ -54,15 +54,16 @@ def word_embedding_space_analysis(
|
|
54 |
word = word[1:]
|
55 |
if word.lower() in nltk.corpus.words.words():
|
56 |
output.append(word)
|
57 |
-
return output
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
63 |
return pd.DataFrame(
|
64 |
data,
|
65 |
-
columns=["
|
66 |
index=[f"Dim#{_i}" for _i in range(n_dim)],
|
67 |
)
|
68 |
|
@@ -196,7 +197,7 @@ def main():
|
|
196 |
# Analysing the sentence
|
197 |
st.divider()
|
198 |
st.divider()
|
199 |
-
st.subheader("LM-Steer Converts
|
200 |
'''
|
201 |
LM-Steer also serves as a probe for analyzing the text. It can be used to
|
202 |
analyze the sentiment and detoxification of the text. Now, we proceed and
|
@@ -267,14 +268,8 @@ def main():
|
|
267 |
embeddings: what word dimensions contribute to or contrast to a specific
|
268 |
style. This analysis can be used to understand the word embedding space
|
269 |
and how it steers the model's generation.
|
270 |
-
|
271 |
-
Note that due to the bidirectional nature of the embedding spaces, in each
|
272 |
-
dimension, sometimes only one side of the word embeddings contributes
|
273 |
-
(has an impact on the style), while the other side, (resulting in negative
|
274 |
-
logits) has a negligible impact on the style. The table below shows both
|
275 |
-
sides of the word embeddings in each dimension.
|
276 |
'''
|
277 |
-
for dimension in ["
|
278 |
f'##### {dimension} Word Dimensions'
|
279 |
dim = 2 if dimension == "Sentiment" else 0
|
280 |
analysis_result = word_embedding_space_analysis(
|
|
|
36 |
S, V, D = torch.linalg.svd(matrix)
|
37 |
|
38 |
data = []
|
39 |
+
top = 50
|
40 |
select_words = 20
|
41 |
n_dim = 10
|
42 |
for _i in range(n_dim):
|
|
|
54 |
word = word[1:]
|
55 |
if word.lower() in nltk.corpus.words.words():
|
56 |
output.append(word)
|
57 |
+
return output
|
58 |
|
59 |
+
left_tokens = filter_words(left_tokens)
|
60 |
+
right_tokens = filter_words(right_tokens)
|
61 |
+
if len(left_tokens) < len(right_tokens):
|
62 |
+
left_tokens = right_tokens
|
63 |
+
data.append(", ".join(left_tokens[:select_words]))
|
64 |
return pd.DataFrame(
|
65 |
data,
|
66 |
+
columns=["Words Contributing to the Style"],
|
67 |
index=[f"Dim#{_i}" for _i in range(n_dim)],
|
68 |
)
|
69 |
|
|
|
197 |
# Analysing the sentence
|
198 |
st.divider()
|
199 |
st.divider()
|
200 |
+
st.subheader("LM-Steer Converts Any LM Into A Text Analyzer")
|
201 |
'''
|
202 |
LM-Steer also serves as a probe for analyzing the text. It can be used to
|
203 |
analyze the sentiment and detoxification of the text. Now, we proceed and
|
|
|
268 |
embeddings: what word dimensions contribute to or contrast to a specific
|
269 |
style. This analysis can be used to understand the word embedding space
|
270 |
and how it steers the model's generation.
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
'''
|
272 |
+
for dimension in ["Detoxification", "Sentiment"]:
|
273 |
f'##### {dimension} Word Dimensions'
|
274 |
dim = 2 if dimension == "Sentiment" else 0
|
275 |
analysis_result = word_embedding_space_analysis(
|