roberta-hindi / apps /inference.py
mlkorra's picture
Add all models as default
7bfa78c
raw
history blame
2.6 kB
from pandas.io.formats.format import return_docstring
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, AutoModelForMaskedLM
from transformers import pipeline
import os
import json
import random
with open("config.json") as f:
cfg = json.loads(f.read())
@st.cache(show_spinner=False, persist=True)
def load_model(masked_text, model_name):
model = AutoModelForMaskedLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
MASK_TOKEN = tokenizer.mask_token
masked_text = masked_text.replace("<mask>", MASK_TOKEN)
result_sentence = nlp(masked_text)
return result_sentence[0]["sequence"], result_sentence[0]["token_str"]
def app():
st.markdown(
"<h1 style='text-align: center; color: green;'>RoBERTa Hindi</h1>",
unsafe_allow_html=True,
)
st.markdown(
"This demo uses multiple hindi transformer models for Masked Language Modelling (MLM)."
)
models_list = list(cfg["models"].keys())
models = st.multiselect("Choose models", models_list, models_list)
target_text_path = "./mlm_custom/mlm_targeted_text.csv"
target_text_df = pd.read_csv(target_text_path)
texts = target_text_df["text"]
st.sidebar.title("Hindi MLM")
pick_random = st.sidebar.checkbox("Pick any random text")
results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"])
model_names = []
filled_masked_texts = []
filled_tokens = []
if pick_random:
random_text = texts[random.randint(0, texts.shape[0] - 1)]
masked_text = st.text_area("Please type a masked sentence to fill", random_text)
else:
select_text = st.sidebar.selectbox("Select any of the following text", texts)
masked_text = st.text_area("Please type a masked sentence to fill", select_text)
# pd.set_option('max_colwidth',30)
if st.button("Fill the Mask!"):
with st.spinner("Filling the Mask..."):
for selected_model in models:
filled_sentence, filled_token = load_model(
masked_text, cfg["models"][selected_model]
)
model_names.append(selected_model)
filled_tokens.append(filled_token)
filled_masked_texts.append(filled_sentence)
results_df["Model Name"] = model_names
results_df["Filled Token"] = filled_tokens
results_df["Filled Text"] = filled_masked_texts
st.table(results_df)