Spaces:

aibloq-founder
/

how-machines-understand-natural-languages-via-NLP

Sleeping

App Files Files Community

Aryan J Chugh commited on May 4, 2023

Commit

e89c31d

•

1 Parent(s): e49e08c

Initial commit

Browse files

Files changed (4) hide show

app.py +134 -0
pca_labels.npy +3 -0
pca_vectors.npy +3 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import numpy as np
+import gradio as gr
+import gensim.downloader
+import matplotlib.pyplot as plt
+import plotly.graph_objs as go
+import seaborn as sns
+glove_vectors = gensim.downloader.load('glove-twitter-25')
+labels = np.load('pca_labels.npy')
+vectors = np.load('pca_vectors.npy')
+with gr.Blocks() as demo:
+    gr.Markdown("""
+# ![ai bloq logo https://www.aibloq.com](https://aibloq.com/_next/image?url=%2FLogo.png&w=48&q=75) [Ai Bloq](https://www.aibloq.com)
+# How machines understand natural language
+## This NLP example is a part of Ai Bloq's Blog: **[How do machines understand text via Natural Language Processing (NLP)](https://www.aibloq.com)**
+### For more such content and illustrative explanations visit [Ai Bloq's Resources](https://www.aibloq.com) and explore different machine learning and deep learning concepts
+## **To create industry level artificially intelligent services and application sign up for a free demo account at [Ai Bloq](https://www.aibloq.com) :- A No-Code data science platform with industry level auto scaling capabilities**
+    """)
+    with gr.Tab("Visualize words"):
+        sentence_input_viz = gr.Textbox(label="Enter a sentence")
+        pca_output = gr.Plot()
+        generate_pca_button = gr.Button("Visualize words in 3D space")
+    with gr.Tab("View word vectors"):
+        sentence_input = gr.Textbox(label="Enter a sentence")
+        vectors_output = gr.Plot()
+        generate_vectors_button = gr.Button("Generate vectors")
+    with gr.Accordion("Words not present in the vocabulary"):
+        excl_words_md = gr.Markdown("Enter a sentence and generate vectors first")
+    def break_words(input_sentence):
+        if len(input_sentence.strip()) == 0:
+            raise gr.Error('Cannot process input without any words')
+        words = input_sentence.strip().split(" ")
+        if len(words) > 15:
+            raise gr.Error("Maximum sentence length is 15 words")
+        final_words = []
+        excluded_words_state = []
+        for word in words:
+            if glove_vectors.key_to_index.get(word.strip(), None) == None:
+                excluded_words_state.append(word.strip())
+            else:
+                final_words.append(word.strip())
+        if len(final_words) == 0:
+            raise gr.Error("No word is present in the vocabulary, please try with another sentence")
+        return final_words, excluded_words_state
+    def generate_vectors(input_sentence):
+        final_words, excluded_words_state = break_words(input_sentence)
+        fig, axs = plt.subplots(1, figsize=(40, len(final_words)*2))
+        data = []
+        for word in final_words:
+            data.append(glove_vectors[word])
+        sns.heatmap(np.array(data).reshape(-1, 25), annot=True, ax=axs)
+        axs.tick_params(bottom=False)
+        axs.set(xticklabels=[])
+        axs.set(yticklabels=final_words)
+        axs.tick_params(axis='y', labelsize=20)
+        excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
+        return [fig, excluded_words_state]
+    def generate_pca_plot(input_sentence):
+        final_words, excluded_words_state = break_words(input_sentence)
+        df_new = {
+            "x": [],
+            "y": [],
+            "z": [],
+            "label": []
+        }
+        for word in final_words:
+            word_index = np.where(labels == word)[0][0]
+            df_new["x"].append(vectors[word_index][0])
+            df_new["y"].append(vectors[word_index][1])
+            df_new["z"].append(vectors[word_index][2])
+            df_new["label"].append(word)
+        trace1 = go.Scatter3d(
+            x=df_new["x"],
+            y=df_new["y"],
+            z=df_new["z"],
+            mode='markers+text',
+            text=df_new['label'],
+            showlegend=False
+        )
+        traces = [trace1]
+        for i in range(len(df_new["x"])):
+            traces.append(
+                go.Scatter3d(
+            x=[0, df_new["x"][i]],
+            y=[0, df_new["y"][i]],
+            z=[0, df_new["z"][i]],
+            mode='lines',
+            showlegend=False,
+        )
+        )
+        excluded_words_state = ", ".join(excluded_words_state) if len(excluded_words_state) != 0 else "None, all words are present in the vocabulary"
+        return [go.Figure(data=traces), excluded_words_state]
+    generate_vectors_button.click(generate_vectors, inputs=sentence_input, outputs=[vectors_output, excl_words_md])
+    generate_pca_button.click(generate_pca_plot, inputs=sentence_input_viz, outputs=[pca_output, excl_words_md])
+if __name__ == "__main__":
+    demo.launch()

pca_labels.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75b9d45b968f0c817965023f872cd55a97b272061f46a3c35afaf3a4e68e815c
+size 668367968

pca_vectors.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d012d420c713c67862ca4ad34ecc025810cc8025922e02ed106d4fb00892c3c
+size 14322296

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy
+gensim
+matplotlib
+plotly
+seaborn