Spaces:

felipekitamura
/

word_embeddings

Sleeping

App Files Files Community

felipekitamura commited on Apr 19, 2024

Commit

c2b2793

verified ·

1 Parent(s): 1149639

Create app.py

Browse files

Files changed (1) hide show

app.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+model = np.load('gpt2-1k-words.npy',allow_pickle='TRUE').item()
+cache = "/home/user/app/d.jpg"
+# Function to reduce dimensions
+def reduce_dimensions(data, method='PCA'):
+    if method == 'PCA':
+        model = PCA(n_components=2)
+    elif method == 'TSNE':
+        model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
+    return model.fit_transform(data)
+# Plotting function
+def plot_reduced_data(reduced_data, labels, title):
+    plt.figure(figsize=(10, 8))
+    plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
+    for i, label in enumerate(labels):
+        plt.annotate("  " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
+    plt.title(title)
+    # Data for the arrow 1
+    start_point = (reduced_data[0, 0], reduced_data[0, 1])  # Starting point of the arrow
+    end_point = (reduced_data[1, 0], reduced_data[1, 1])  # Ending point of the arrow
+    # Adding an arrow 1
+    plt.annotate('', xy=end_point, xytext=start_point,
+                 arrowprops=dict(arrowstyle="->", color='green', lw=3))
+    # Data for the arrow 2
+    end_point = (reduced_data[-1, 0] , reduced_data[-1, 1])  # Starting point of the arrow
+    start_point = (reduced_data[2, 0], reduced_data[2, 1])  # Ending point of the arrow
+    # Adding an arrow 2
+    plt.annotate('', xy=end_point, xytext=start_point,
+                 arrowprops=dict(arrowstyle="->", color='green', lw=3))
+    plt.xlabel('Component 1')
+    plt.ylabel('Component 2')
+    plt.grid(True)
+    plt.savefig(cache)
+description = """
+### Word Embedding Demo App
+Universidade Federal de São Paulo - Escola Paulista de Medicina
+The output is Word3 + (Word2 - Word1)
+Credits:
+* Gensim
+* Glove
+"""
+Word1 = gr.Textbox()
+Word2 = gr.Textbox()
+Word3 = gr.Textbox()
+label = gr.Label(show_label=True, label="Word4")
+sp = gr.Image()
+def inference(word1, word2, word3):
+    transform = model[word3] + model[word2] - model[word1]
+    output = model.similar_by_vector(transform)
+    print(output)
+    word_list = [word1, word2, word3]
+    word_list.extend([x for x,y in [item for item in output[:4]]])
+    words = {key: model[key] for key in word_list}
+    words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
+    data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
+    print(data.shape)
+    labels = words.keys()
+    reduced_data_pca = reduce_dimensions(data, method='PCA')
+    print(reduced_data_pca.shape)
+    plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
+    return cache
+examples = [
+    ["woman", "man", "aunt"],
+    ["woman", "man", "girl"],
+    ["woman", "man", "granddaughter"],
+]
+iface = gr.Interface(
+    fn=inference,
+    inputs=[Word1, Word2, Word3],
+    outputs=sp,
+    description=description,
+    examples=examples
+    )
+iface.launch()