Aryan J Chugh
commited on
Commit
•
e89c31d
1
Parent(s):
e49e08c
Initial commit
Browse files- app.py +134 -0
- pca_labels.npy +3 -0
- pca_vectors.npy +3 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gradio as gr
|
3 |
+
import gensim.downloader
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.graph_objs as go
|
6 |
+
import seaborn as sns
|
7 |
+
|
8 |
+
glove_vectors = gensim.downloader.load('glove-twitter-25')
|
9 |
+
|
10 |
+
labels = np.load('pca_labels.npy')
|
11 |
+
vectors = np.load('pca_vectors.npy')
|
12 |
+
|
13 |
+
|
14 |
+
with gr.Blocks() as demo:
|
15 |
+
gr.Markdown("""
|
16 |
+
# ![ai bloq logo https://www.aibloq.com](https://aibloq.com/_next/image?url=%2FLogo.png&w=48&q=75) [Ai Bloq](https://www.aibloq.com)
|
17 |
+
# How machines understand natural language
|
18 |
+
## This NLP example is a part of Ai Bloq's Blog: **[How do machines understand text via Natural Language Processing (NLP)](https://www.aibloq.com)**
|
19 |
+
### For more such content and illustrative explanations visit [Ai Bloq's Resources](https://www.aibloq.com) and explore different machine learning and deep learning concepts
|
20 |
+
## **To create industry level artificially intelligent services and application sign up for a free demo account at [Ai Bloq](https://www.aibloq.com) :- A No-Code data science platform with industry level auto scaling capabilities**
|
21 |
+
""")
|
22 |
+
with gr.Tab("Visualize words"):
|
23 |
+
sentence_input_viz = gr.Textbox(label="Enter a sentence")
|
24 |
+
pca_output = gr.Plot()
|
25 |
+
|
26 |
+
generate_pca_button = gr.Button("Visualize words in 3D space")
|
27 |
+
with gr.Tab("View word vectors"):
|
28 |
+
sentence_input = gr.Textbox(label="Enter a sentence")
|
29 |
+
vectors_output = gr.Plot()
|
30 |
+
|
31 |
+
generate_vectors_button = gr.Button("Generate vectors")
|
32 |
+
|
33 |
+
with gr.Accordion("Words not present in the vocabulary"):
|
34 |
+
excl_words_md = gr.Markdown("Enter a sentence and generate vectors first")
|
35 |
+
|
36 |
+
def break_words(input_sentence):
|
37 |
+
|
38 |
+
if len(input_sentence.strip()) == 0:
|
39 |
+
raise gr.Error('Cannot process input without any words')
|
40 |
+
|
41 |
+
words = input_sentence.strip().split(" ")
|
42 |
+
|
43 |
+
if len(words) > 15:
|
44 |
+
raise gr.Error("Maximum sentence length is 15 words")
|
45 |
+
|
46 |
+
final_words = []
|
47 |
+
excluded_words_state = []
|
48 |
+
|
49 |
+
for word in words:
|
50 |
+
|
51 |
+
if glove_vectors.key_to_index.get(word.strip(), None) == None:
|
52 |
+
excluded_words_state.append(word.strip())
|
53 |
+
else:
|
54 |
+
final_words.append(word.strip())
|
55 |
+
|
56 |
+
if len(final_words) == 0:
|
57 |
+
raise gr.Error("No word is present in the vocabulary, please try with another sentence")
|
58 |
+
|
59 |
+
return final_words, excluded_words_state
|
60 |
+
|
61 |
+
def generate_vectors(input_sentence):
|
62 |
+
|
63 |
+
final_words, excluded_words_state = break_words(input_sentence)
|
64 |
+
|
65 |
+
fig, axs = plt.subplots(1, figsize=(40, len(final_words)*2))
|
66 |
+
|
67 |
+
data = []
|
68 |
+
|
69 |
+
for word in final_words:
|
70 |
+
|
71 |
+
data.append(glove_vectors[word])
|
72 |
+
|
73 |
+
|
74 |
+
sns.heatmap(np.array(data).reshape(-1, 25), annot=True, ax=axs)
|
75 |
+
axs.tick_params(bottom=False)
|
76 |
+
axs.set(xticklabels=[])
|
77 |
+
axs.set(yticklabels=final_words)
|
78 |
+
axs.tick_params(axis='y', labelsize=20)
|
79 |
+
|
80 |
+
excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
|
81 |
+
|
82 |
+
return [fig, excluded_words_state]
|
83 |
+
|
84 |
+
def generate_pca_plot(input_sentence):
|
85 |
+
|
86 |
+
final_words, excluded_words_state = break_words(input_sentence)
|
87 |
+
|
88 |
+
df_new = {
|
89 |
+
"x": [],
|
90 |
+
"y": [],
|
91 |
+
"z": [],
|
92 |
+
"label": []
|
93 |
+
}
|
94 |
+
|
95 |
+
for word in final_words:
|
96 |
+
|
97 |
+
word_index = np.where(labels == word)[0][0]
|
98 |
+
|
99 |
+
df_new["x"].append(vectors[word_index][0])
|
100 |
+
df_new["y"].append(vectors[word_index][1])
|
101 |
+
df_new["z"].append(vectors[word_index][2])
|
102 |
+
df_new["label"].append(word)
|
103 |
+
|
104 |
+
trace1 = go.Scatter3d(
|
105 |
+
x=df_new["x"],
|
106 |
+
y=df_new["y"],
|
107 |
+
z=df_new["z"],
|
108 |
+
mode='markers+text',
|
109 |
+
text=df_new['label'],
|
110 |
+
showlegend=False
|
111 |
+
)
|
112 |
+
|
113 |
+
traces = [trace1]
|
114 |
+
|
115 |
+
for i in range(len(df_new["x"])):
|
116 |
+
traces.append(
|
117 |
+
go.Scatter3d(
|
118 |
+
x=[0, df_new["x"][i]],
|
119 |
+
y=[0, df_new["y"][i]],
|
120 |
+
z=[0, df_new["z"][i]],
|
121 |
+
mode='lines',
|
122 |
+
showlegend=False,
|
123 |
+
)
|
124 |
+
)
|
125 |
+
|
126 |
+
excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
|
127 |
+
|
128 |
+
return [go.Figure(data=traces), excluded_words_state]
|
129 |
+
|
130 |
+
generate_vectors_button.click(generate_vectors, inputs=sentence_input, outputs=[vectors_output, excl_words_md])
|
131 |
+
generate_pca_button.click(generate_pca_plot, inputs=sentence_input_viz, outputs=[pca_output, excl_words_md])
|
132 |
+
|
133 |
+
if __name__ == "__main__":
|
134 |
+
demo.launch()
|
pca_labels.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75b9d45b968f0c817965023f872cd55a97b272061f46a3c35afaf3a4e68e815c
|
3 |
+
size 668367968
|
pca_vectors.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d012d420c713c67862ca4ad34ecc025810cc8025922e02ed106d4fb00892c3c
|
3 |
+
size 14322296
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
gensim
|
3 |
+
matplotlib
|
4 |
+
plotly
|
5 |
+
seaborn
|