Spaces:

felipekitamura
/

word_embeddings

Sleeping

App Files Files Community

word_embeddings / app.py

felipekitamura

Update app.py

810b96d verified 9 months ago

raw

history blame

1.94 kB

	import gensim.downloader
	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.decomposition import PCA
	from sklearn.manifold import TSNE
	model = gensim.downloader.load("glove-wiki-gigaword-50")

	# Function to reduce dimensions
	def reduce_dimensions(data, method='PCA'):
	if method == 'PCA':
	model = PCA(n_components=2)
	elif method == 'TSNE':
	model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
	return model.fit_transform(data)

	description = """
	### Word Embedding Demo App
	Universidade Federal de São Paulo - Escola Paulista de Medicina

	The output is Word3 + (Word2 - Word1)

	Credits:
	* Gensim
	* Glove
	"""

	Word1 = gr.Textbox()
	Word2 = gr.Textbox()
	Word3 = gr.Textbox()
	label = gr.Label(show_label=True, label="Word4")
	sp = gr.ScatterPlot(x="x", y="y", color="color", label="label")


	def inference(word1, word2, word3):
	output = model.similar_by_vector(model[word3] + model[word2] - model[word1])
	print(output)
	word_list = [word1, word2, word3]
	word_list.extend([x for x,y in [item for item in output[:4]]])
	words = {key: model[key] for key in word_list}
	data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
	print(data.shape)
	labels = words.keys()
	reduced_data_pca = reduce_dimensions(data, method='PCA')
	print(reduced_data_pca.shape)
	#'''
	df = pd.DataFrame({
	"x": reduced_data_pca[:, 0],
	"y": reduced_data_pca[:, 1],
	"color": labels[:len(data)]
	#"label": ["W1", "W2", "W3", "W4", "W5", "W6", "W7"][:len(data)]
	})
	#'''
	return df

	examples = [
	["woman", "man", "aunt"],
	["woman", "man", "girl"],
	["woman", "man", "granddaughter"],
	]

	iface = gr.Interface(
	fn=inference,
	inputs=[Word1, Word2, Word3],
	outputs=sp,
	description=description,
	examples=examples
	)

	iface.launch()