word_embeddings / app_glove.py
felipekitamura's picture
Rename app.py to app_glove.py
1149639 verified
raw
history blame
3 kB
import gensim.downloader
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
model = gensim.downloader.load("glove-wiki-gigaword-50")
cache = "/home/user/app/d.jpg"
# Function to reduce dimensions
def reduce_dimensions(data, method='PCA'):
if method == 'PCA':
model = PCA(n_components=2)
elif method == 'TSNE':
model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
return model.fit_transform(data)
# Plotting function
def plot_reduced_data(reduced_data, labels, title):
plt.figure(figsize=(10, 8))
plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
for i, label in enumerate(labels):
plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
plt.title(title)
# Data for the arrow 1
start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow
end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow
# Adding an arrow 1
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
# Data for the arrow 2
end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow
start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow
# Adding an arrow 2
plt.annotate('', xy=end_point, xytext=start_point,
arrowprops=dict(arrowstyle="->", color='green', lw=3))
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.grid(True)
plt.savefig(cache)
description = """
### Word Embedding Demo App
Universidade Federal de São Paulo - Escola Paulista de Medicina
The output is Word3 + (Word2 - Word1)
Credits:
* Gensim
* Glove
"""
Word1 = gr.Textbox()
Word2 = gr.Textbox()
Word3 = gr.Textbox()
label = gr.Label(show_label=True, label="Word4")
sp = gr.Image()
def inference(word1, word2, word3):
transform = model[word3] + model[word2] - model[word1]
output = model.similar_by_vector(transform)
print(output)
word_list = [word1, word2, word3]
word_list.extend([x for x,y in [item for item in output[:4]]])
words = {key: model[key] for key in word_list}
words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
print(data.shape)
labels = words.keys()
reduced_data_pca = reduce_dimensions(data, method='PCA')
print(reduced_data_pca.shape)
plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
return cache
examples = [
["woman", "man", "aunt"],
["woman", "man", "girl"],
["woman", "man", "granddaughter"],
]
iface = gr.Interface(
fn=inference,
inputs=[Word1, Word2, Word3],
outputs=sp,
description=description,
examples=examples
)
iface.launch()