Spaces:
Runtime error
Runtime error
from pandas.io.formats.format import return_docstring | |
import streamlit as st | |
import pandas as pd | |
from transformers import AutoTokenizer, AutoModelForMaskedLM | |
from transformers import pipeline | |
import os | |
import json | |
import random | |
with open("config.json") as f: | |
cfg = json.loads(f.read()) | |
def load_model(masked_text, model_name): | |
model = AutoModelForMaskedLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer) | |
MASK_TOKEN = tokenizer.mask_token | |
masked_text = masked_text.replace("<mask>", MASK_TOKEN) | |
result_sentence = nlp(masked_text) | |
return result_sentence[0]["sequence"], result_sentence[0]["token_str"] | |
def app(): | |
st.markdown( | |
"<h1 style='text-align: center; color: green;'>RoBERTa Hindi</h1>", | |
unsafe_allow_html=True, | |
) | |
st.markdown( | |
"This demo uses multiple hindi transformer models for Masked Language Modelling (MLM)." | |
) | |
models_list = list(cfg["models"].keys()) | |
models = st.multiselect("Choose models", models_list, models_list) | |
target_text_path = "./mlm_custom/mlm_targeted_text.csv" | |
target_text_df = pd.read_csv(target_text_path) | |
texts = target_text_df["text"] | |
st.sidebar.title("Hindi MLM") | |
pick_random = st.sidebar.checkbox("Pick any random text") | |
results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"]) | |
model_names = [] | |
filled_masked_texts = [] | |
filled_tokens = [] | |
if pick_random: | |
random_text = texts[random.randint(0, texts.shape[0] - 1)] | |
masked_text = st.text_area("Please type a masked sentence to fill", random_text) | |
else: | |
select_text = st.sidebar.selectbox("Select any of the following text", texts) | |
masked_text = st.text_area("Please type a masked sentence to fill", select_text) | |
# pd.set_option('max_colwidth',30) | |
if st.button("Fill the Mask!"): | |
with st.spinner("Filling the Mask..."): | |
for selected_model in models: | |
filled_sentence, filled_token = load_model( | |
masked_text, cfg["models"][selected_model] | |
) | |
model_names.append(selected_model) | |
filled_tokens.append(filled_token) | |
filled_masked_texts.append(filled_sentence) | |
results_df["Model Name"] = model_names | |
results_df["Filled Token"] = filled_tokens | |
results_df["Filled Text"] = filled_masked_texts | |
st.table(results_df) | |