Aryan J Chugh commited on
Commit
e89c31d
1 Parent(s): e49e08c

Initial commit

Browse files
Files changed (4) hide show
  1. app.py +134 -0
  2. pca_labels.npy +3 -0
  3. pca_vectors.npy +3 -0
  4. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ import gensim.downloader
4
+ import matplotlib.pyplot as plt
5
+ import plotly.graph_objs as go
6
+ import seaborn as sns
7
+
8
+ glove_vectors = gensim.downloader.load('glove-twitter-25')
9
+
10
+ labels = np.load('pca_labels.npy')
11
+ vectors = np.load('pca_vectors.npy')
12
+
13
+
14
+ with gr.Blocks() as demo:
15
+ gr.Markdown("""
16
+ # ![ai bloq logo https://www.aibloq.com](https://aibloq.com/_next/image?url=%2FLogo.png&w=48&q=75) [Ai Bloq](https://www.aibloq.com)
17
+ # How machines understand natural language
18
+ ## This NLP example is a part of Ai Bloq's Blog: **[How do machines understand text via Natural Language Processing (NLP)](https://www.aibloq.com)**
19
+ ### For more such content and illustrative explanations visit [Ai Bloq's Resources](https://www.aibloq.com) and explore different machine learning and deep learning concepts
20
+ ## **To create industry level artificially intelligent services and application sign up for a free demo account at [Ai Bloq](https://www.aibloq.com) :- A No-Code data science platform with industry level auto scaling capabilities**
21
+ """)
22
+ with gr.Tab("Visualize words"):
23
+ sentence_input_viz = gr.Textbox(label="Enter a sentence")
24
+ pca_output = gr.Plot()
25
+
26
+ generate_pca_button = gr.Button("Visualize words in 3D space")
27
+ with gr.Tab("View word vectors"):
28
+ sentence_input = gr.Textbox(label="Enter a sentence")
29
+ vectors_output = gr.Plot()
30
+
31
+ generate_vectors_button = gr.Button("Generate vectors")
32
+
33
+ with gr.Accordion("Words not present in the vocabulary"):
34
+ excl_words_md = gr.Markdown("Enter a sentence and generate vectors first")
35
+
36
+ def break_words(input_sentence):
37
+
38
+ if len(input_sentence.strip()) == 0:
39
+ raise gr.Error('Cannot process input without any words')
40
+
41
+ words = input_sentence.strip().split(" ")
42
+
43
+ if len(words) > 15:
44
+ raise gr.Error("Maximum sentence length is 15 words")
45
+
46
+ final_words = []
47
+ excluded_words_state = []
48
+
49
+ for word in words:
50
+
51
+ if glove_vectors.key_to_index.get(word.strip(), None) == None:
52
+ excluded_words_state.append(word.strip())
53
+ else:
54
+ final_words.append(word.strip())
55
+
56
+ if len(final_words) == 0:
57
+ raise gr.Error("No word is present in the vocabulary, please try with another sentence")
58
+
59
+ return final_words, excluded_words_state
60
+
61
+ def generate_vectors(input_sentence):
62
+
63
+ final_words, excluded_words_state = break_words(input_sentence)
64
+
65
+ fig, axs = plt.subplots(1, figsize=(40, len(final_words)*2))
66
+
67
+ data = []
68
+
69
+ for word in final_words:
70
+
71
+ data.append(glove_vectors[word])
72
+
73
+
74
+ sns.heatmap(np.array(data).reshape(-1, 25), annot=True, ax=axs)
75
+ axs.tick_params(bottom=False)
76
+ axs.set(xticklabels=[])
77
+ axs.set(yticklabels=final_words)
78
+ axs.tick_params(axis='y', labelsize=20)
79
+
80
+ excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
81
+
82
+ return [fig, excluded_words_state]
83
+
84
+ def generate_pca_plot(input_sentence):
85
+
86
+ final_words, excluded_words_state = break_words(input_sentence)
87
+
88
+ df_new = {
89
+ "x": [],
90
+ "y": [],
91
+ "z": [],
92
+ "label": []
93
+ }
94
+
95
+ for word in final_words:
96
+
97
+ word_index = np.where(labels == word)[0][0]
98
+
99
+ df_new["x"].append(vectors[word_index][0])
100
+ df_new["y"].append(vectors[word_index][1])
101
+ df_new["z"].append(vectors[word_index][2])
102
+ df_new["label"].append(word)
103
+
104
+ trace1 = go.Scatter3d(
105
+ x=df_new["x"],
106
+ y=df_new["y"],
107
+ z=df_new["z"],
108
+ mode='markers+text',
109
+ text=df_new['label'],
110
+ showlegend=False
111
+ )
112
+
113
+ traces = [trace1]
114
+
115
+ for i in range(len(df_new["x"])):
116
+ traces.append(
117
+ go.Scatter3d(
118
+ x=[0, df_new["x"][i]],
119
+ y=[0, df_new["y"][i]],
120
+ z=[0, df_new["z"][i]],
121
+ mode='lines',
122
+ showlegend=False,
123
+ )
124
+ )
125
+
126
+ excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
127
+
128
+ return [go.Figure(data=traces), excluded_words_state]
129
+
130
+ generate_vectors_button.click(generate_vectors, inputs=sentence_input, outputs=[vectors_output, excl_words_md])
131
+ generate_pca_button.click(generate_pca_plot, inputs=sentence_input_viz, outputs=[pca_output, excl_words_md])
132
+
133
+ if __name__ == "__main__":
134
+ demo.launch()
pca_labels.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75b9d45b968f0c817965023f872cd55a97b272061f46a3c35afaf3a4e68e815c
3
+ size 668367968
pca_vectors.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d012d420c713c67862ca4ad34ecc025810cc8025922e02ed106d4fb00892c3c
3
+ size 14322296
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy
2
+ gensim
3
+ matplotlib
4
+ plotly
5
+ seaborn