Spaces:

hackathon-pln-es
/

Paraphrase-Bertin

Runtime error

App Files Files Community

Paraphrase-Bertin / app.py

Lautaro

Adding App

15d81e0 about 2 years ago

raw history blame contribute delete

No virus

1.94 kB

	import streamlit as st
	import pandas as pd
	import numpy as np

	from sentence_transformers.util import cos_sim
	from sentence_transformers import SentenceTransformer
	from bokeh.plotting import figure, output_notebook, show, save
	from bokeh.io import output_file, show
	from bokeh.models import ColumnDataSource, HoverTool
	from sklearn.manifold import TSNE


	@st.cache
	def load_model():
	model = SentenceTransformer('hackathon-pln-es/paraphrase-spanish-distilroberta')
	model.eval()
	return model

	st.title("Sentence Embedding for Spanish with Bertin")
	st.write("Sentence embedding for spanish trained according to instructions in the paper [Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation](https://arxiv.org/pdf/2004.09813.pdf) and the [documentation](https://www.sbert.net/examples/training/multilingual/README.html) accompanying its companion python package. We have used the strongest available pretrained English Bi-Encoder ([paraphrase-mpnet-base-v2](https://www.sbert.net/docs/pretrained_models.html#sentence-embedding-models)) as a teacher model, and the pretrained Spanish [BERTIN](https://huggingface.co/bertin-project/bertin-roberta-base-spanish) as the student model. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/paraphrase-spanish-distilroberta.")
	st.write("Introduce two sentence to see their cosine similarity and a graph showing them in the embedding space.")
	st.write("Authors: Anibal Pérez, Emilio Tomás Ariza, Lautaro Gesuelli Pinto y Mauricio Mazuecos.")

	sent1 = st.text_area('Enter sentence 1')
	sent2 = st.text_area('Enter sentence 2')

	if st.button('Compute similarity'):
	if sent1 and sent2:
	model = load_model()
	encodings = model.encode([sent1, sent2])
	sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
	st.text('Cosine Similarity: {0:.4f}'.format(sim))
	else:
	st.write('Missing a sentences')
	else:
	pass