Spaces:
Runtime error
Runtime error
from pandas.io.formats.format import return_docstring | |
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForMaskedLM | |
from transformers import pipeline | |
import os | |
import json | |
def load_model(masked_text, model_name): | |
from_flax = False | |
if model_name == "flax-community/roberta-hindi": | |
from_flax = True | |
# st.write(model_name, masked_text) | |
model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=from_flax) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
MASK_TOKEN = tokenizer.mask_token | |
masked_text = masked_text.replcae("<mask>", MASK_TOKEN) | |
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer) | |
result_sentence = nlp(masked_text) | |
return result_sentence | |
def app(): | |
st.markdown( | |
"<h1 style='text-align: center; color: green;'>RoBERTa Hindi</h1>", | |
unsafe_allow_html=True, | |
) | |
st.markdown( | |
"This demo uses pretrained RoBERTa variants for Mask Language Modelling (MLM)" | |
) | |
target_text_path = "./mlm_custom/mlm_targeted_text.csv" | |
target_text_df = pd.read_csv(target_text_path) | |
texts = target_text_df["text"] | |
st.markdown("""## Select any of the following text : """) | |
masked_text = st.selectbox("", texts) | |
st.write("You selected:", masked_text) | |
selected_models = st.multiselect( | |
"Choose models", | |
[ | |
"flax-community/roberta-hindi", | |
"mrm8488/HindiBERTa", | |
"ai4bharat/indic-bert", | |
"neuralspace-reverie/indic-transformers-hi-bert", | |
"surajp/RoBERTa-hindi-guj-san", | |
], | |
["flax-community/roberta-hindi"], | |
) | |
if st.button("Fill the Mask!"): | |
with st.spinner("Filling the Mask..."): | |
models = [] | |
predicted_tokens = [] | |
predicted_sentence = [] | |
for i in range(len(selected_models)): | |
filled_sentence = load_model(masked_text, selected_models[i]) | |
# st.write(filled_sentence) | |
models.append(selected_models[i]) | |
predicted_tokens.append(filled_sentence[0]["token_str"]) | |
predicted_sentence.append(filled_sentence[0]["sequence"]) | |
results_df = pd.DataFrame() | |
results_df["Model Name"] = models | |
results_df["Predicted Word"] = predicted_tokens | |
results_df["Sentence"] = predicted_sentence | |
st.dataframe(results_df) | |