Spaces:
Runtime error
Runtime error
File size: 2,804 Bytes
d58e1c6 33ae6dd 666b7aa 3f6b043 d58e1c6 666b7aa d58e1c6 666b7aa d58e1c6 666b7aa 33ae6dd 666b7aa d58e1c6 666b7aa d58e1c6 90dc487 d58e1c6 666b7aa d58e1c6 666b7aa d58e1c6 666b7aa d58e1c6 666b7aa d58e1c6 666b7aa 33ae6dd 666b7aa 33ae6dd 3f6b043 666b7aa 33ae6dd 666b7aa 33ae6dd 666b7aa 33ae6dd decd5de 666b7aa 33ae6dd 3f6b043 33ae6dd decd5de 666b7aa d58e1c6 666b7aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import json
import random
import pandas as pd
import streamlit as st
from transformers import AutoModelForMaskedLM, AutoTokenizer, pipeline
with open("config.json") as f:
cfg = json.loads(f.read())
@st.cache(show_spinner=False, persist=True)
def load_model(masked_text, model_name):
model = AutoModelForMaskedLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
MASK_TOKEN = tokenizer.mask_token
masked_text = masked_text.replace("<mask>", MASK_TOKEN)
result_sentence = nlp(masked_text)
return result_sentence[0]["sequence"], result_sentence[0]["token_str"]
def main():
st.title("RoBERTa Hindi")
st.markdown(
"This demo uses the below pretrained BERT variants for Mask Language Modeling (MLM):\n"
"- [RoBERTa Hindi](https://huggingface.co/flax-community/roberta-hindi)\n"
"- [Indic Transformers Hindi](https://huggingface.co/neuralspace-reverie/indic-transformers-hi-bert)\n"
"- [HindiBERTa](https://huggingface.co/mrm8488/HindiBERTa)\n"
"- [RoBERTa Hindi Guj San](https://huggingface.co/surajp/RoBERTa-hindi-guj-san)"
)
models_list = list(cfg["models"].keys())
models = st.multiselect(
"Choose models",
models_list,
models_list[0],
)
target_text_path = "./mlm_custom/mlm_targeted_text.csv"
target_text_df = pd.read_csv(target_text_path)
texts = target_text_df["text"]
st.sidebar.title("Hindi MLM")
pick_random = st.sidebar.checkbox("Pick any random text")
results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"])
model_names = []
filled_masked_texts = []
filled_tokens = []
if pick_random:
random_text = texts[random.randint(0, texts.shape[0] - 1)]
masked_text = st.text_area("Please type a masked sentence to fill", random_text)
else:
select_text = st.sidebar.selectbox("Select any of the following text", texts)
masked_text = st.text_area("Please type a masked sentence to fill", select_text)
# pd.set_option('max_colwidth',30)
if st.button("Fill the Mask!"):
with st.spinner("Filling the Mask..."):
for selected_model in models:
filled_sentence, filled_token = load_model(masked_text, cfg["models"][selected_model])
model_names.append(selected_model)
filled_tokens.append(filled_token)
filled_masked_texts.append(filled_sentence)
results_df["Model Name"] = model_names
results_df["Filled Token"] = filled_tokens
results_df["Filled Text"] = filled_masked_texts
st.table(results_df)
if __name__ == "__main__":
main()
|