Spaces:

flax-community
/

bertin

Runtime error

App Files Files Community

bertin / app.py

mariagrandury

Initial commit

3954682 almost 3 years ago

raw history blame

No virus

4.07 kB

	import random
	from mtranslate import translate
	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline


	LOGO = "https://raw.githubusercontent.com/nlp-en-es/assets/main/logo.png"

	MODELS = {
	"RoBERTa Base": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-roberta-base-spanish"
	},
	"RoBERTa Base Gaussian": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-base-gaussian"
	},
	"RoBERTa Base Random": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-base-random"
	},
	"RoBERTa Base Stepwise": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-base-stepwise"
	},
	"RoBERTa Base Gaussian Experiment": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-base-gaussian-exp-512seqlen"
	},
	"RoBERTa Base Random Experiment": {
	"url": "https://api-inference.huggingface.co/models/bertin-project/bertin-base-random-exp-512seqlen"
	}
	}

	PROMPT_LIST = [
	"Fui a la librería a comprar un <mask>.",
	"¡Qué buen <mask> hace hoy!",
	"Hoy empiezan las vacaciones, vamos a la <mask>.",
	"Mi color favorito es el <mask>.",
	"Voy a <mask>, estoy muy cansada.",
	"Mañana vienen mis amigos de <mask>.",
	"¿Te apetece venir a <mask> conmigo?",
	"En verano hace mucho <mask>.",
	"En el bosque había <mask>."
	]


	@st.cache(show_spinner=False, persist=True)
	def load_model(masked_text, model_name):
	model = AutoModelForMaskedLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
	result = nlp(masked_text)
	return result


	# Page
	st.set_page_config(page_title="BERTIN Demo", page_icon=LOGO)
	st.title("BERTIN")

	#Sidebar
	st.sidebar.image(LOGO)

	# Body
	st.markdown(
	"""
	BERTIN is a series of BERT-based models for Spanish.
	The models are trained with Flax and using TPUs sponsored by Google since this is part of the
	[Flax/Jax Community Week](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104)
	organised by HuggingFace.
	"""
	)

	model_name = st.selectbox("Model",(MODELS.keys()))

	prompt = st.selectbox("Prompt", ["Random", "Custom"])
	if prompt == "Custom":
	prompt_box = "Enter your masked text here..."
	else:
	prompt_box = random.choice(PROMPT_LIST)
	text = st.text_area("Enter text", prompt_box)

	if st.button("Fill the mask"):
	with st.spinner(text="Getting results..."):
	st.subheader("Result")
	result = load_model(text, model_name)
	if "error" in result:
	if type(result["error"]) is str:
	st.write(f'{result["error"]}.', end=" ")
	if "estimated_time" in result:
	st.write(
	f'Please try again in about {result["estimated_time"]:.0f} seconds.'
	)
	else:
	if type(result["error"]) is list:
	for error in result["error"]:
	st.write(f"{error}")
	else:
	result_sequence, result_token = result[0]["sequence"], result[0]["token_str"]
	st.write(result_sequence)
	st.text("English translation")
	st.write(translate(result_sequence, "en", "es"))

	st.markdown(
	"""
	### Team members
	- Javier de la Rosa ([versae](https://huggingface.co/versae))
	- Eduardo González ([edugp](https://huggingface.co/edugp))
	- Paulo Villegas ([paulo](https://huggingface.co/paulo))
	- Pablo González de Prado ([Pablogps](https://huggingface.co/Pablogps))
	- Manu Romero ([mrm8488](https://huggingface.co/mrm8488))
	- María Grandury ([mariagrandury](https://huggingface.co/mariagrandury))

	### More information
	You can find more information about these models
	[here](https://huggingface.co/bertin-project/bertin-roberta-base-spanish).
	"""
	)