import gensim.downloader import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.manifold import TSNE model = gensim.downloader.load("glove-wiki-gigaword-50") # Function to reduce dimensions def reduce_dimensions(data, method='PCA'): if method == 'PCA': model = PCA(n_components=2) elif method == 'TSNE': model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3) return model.fit_transform(data) description = """ ### Word Embedding Demo App Universidade Federal de São Paulo - Escola Paulista de Medicina The output is Word3 + (Word2 - Word1) Credits: * Gensim * Glove """ Word1 = gr.Textbox() Word2 = gr.Textbox() Word3 = gr.Textbox() label = gr.Label(show_label=True, label="Word4") sp = gr.ScatterPlot(x="x", y="y", color="color", label="label") def inference(word1, word2, word3): output = model.similar_by_vector(model[word3] + model[word2] - model[word1]) print(output) word_list = [word1, word2, word3] word_list.extend([x for x,y in [item for item in output[:4]]]) words = {key: model[key] for key in word_list} data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0) print(data.shape) labels = words.keys() reduced_data_pca = reduce_dimensions(data, method='PCA') print(reduced_data_pca.shape) #''' df = pd.DataFrame({ "x": reduced_data_pca[:, 0], "y": reduced_data_pca[:, 1], "color": labels[:len(data)] #"label": ["W1", "W2", "W3", "W4", "W5", "W6", "W7"][:len(data)] }) #''' return df examples = [ ["woman", "man", "aunt"], ["woman", "man", "girl"], ["woman", "man", "granddaughter"], ] iface = gr.Interface( fn=inference, inputs=[Word1, Word2, Word3], outputs=sp, description=description, examples=examples ) iface.launch()