Spaces:

felipekitamura
/

word_embeddings

Sleeping

App Files Files Community

word_embeddings / app_glove.py

felipekitamura

Rename app.py to app_glove.py

1149639 verified 9 months ago

raw

history blame

3 kB

	import gensim.downloader
	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	from sklearn.decomposition import PCA
	from sklearn.manifold import TSNE
	model = gensim.downloader.load("glove-wiki-gigaword-50")

	cache = "/home/user/app/d.jpg"

	# Function to reduce dimensions
	def reduce_dimensions(data, method='PCA'):
	if method == 'PCA':
	model = PCA(n_components=2)
	elif method == 'TSNE':
	model = TSNE(n_components=2, learning_rate='auto', init='random', perplexity=3)
	return model.fit_transform(data)

	# Plotting function
	def plot_reduced_data(reduced_data, labels, title):
	plt.figure(figsize=(10, 8))
	plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.6)
	for i, label in enumerate(labels):
	plt.annotate(" " + label, (reduced_data[i, 0], reduced_data[i, 1]), fontsize=18)
	plt.title(title)
	# Data for the arrow 1
	start_point = (reduced_data[0, 0], reduced_data[0, 1]) # Starting point of the arrow
	end_point = (reduced_data[1, 0], reduced_data[1, 1]) # Ending point of the arrow

	# Adding an arrow 1
	plt.annotate('', xy=end_point, xytext=start_point,
	arrowprops=dict(arrowstyle="->", color='green', lw=3))

	# Data for the arrow 2
	end_point = (reduced_data[-1, 0] , reduced_data[-1, 1]) # Starting point of the arrow
	start_point = (reduced_data[2, 0], reduced_data[2, 1]) # Ending point of the arrow

	# Adding an arrow 2
	plt.annotate('', xy=end_point, xytext=start_point,
	arrowprops=dict(arrowstyle="->", color='green', lw=3))

	plt.xlabel('Component 1')
	plt.ylabel('Component 2')
	plt.grid(True)
	plt.savefig(cache)

	description = """
	### Word Embedding Demo App
	Universidade Federal de São Paulo - Escola Paulista de Medicina

	The output is Word3 + (Word2 - Word1)

	Credits:
	* Gensim
	* Glove
	"""

	Word1 = gr.Textbox()
	Word2 = gr.Textbox()
	Word3 = gr.Textbox()
	label = gr.Label(show_label=True, label="Word4")
	sp = gr.Image()


	def inference(word1, word2, word3):
	transform = model[word3] + model[word2] - model[word1]
	output = model.similar_by_vector(transform)
	print(output)
	word_list = [word1, word2, word3]
	word_list.extend([x for x,y in [item for item in output[:4]]])
	words = {key: model[key] for key in word_list}
	words[word3 + " + (" + word2 + " - " + word1 + ")"] = transform
	data = np.concatenate([x[np.newaxis, :] for x in words.values()], axis=0)
	print(data.shape)
	labels = words.keys()
	reduced_data_pca = reduce_dimensions(data, method='PCA')
	print(reduced_data_pca.shape)
	plot_reduced_data(reduced_data_pca, labels, 'PCA Results')
	return cache

	examples = [
	["woman", "man", "aunt"],
	["woman", "man", "girl"],
	["woman", "man", "granddaughter"],
	]

	iface = gr.Interface(
	fn=inference,
	inputs=[Word1, Word2, Word3],
	outputs=sp,
	description=description,
	examples=examples
	)

	iface.launch()