File size: 2,652 Bytes
d58e1c6
 
 
 
 
 
 
33ae6dd
d58e1c6
 
 
 
 
 
a8ac4ed
33ae6dd
 
 
 
d58e1c6
 
1664596
d58e1c6
 
 
90dc487
d58e1c6
90dc487
d58e1c6
 
 
33ae6dd
046776b
d58e1c6
046776b
 
d58e1c6
 
 
 
 
 
 
 
33ae6dd
 
 
 
d58e1c6
decd5de
33ae6dd
 
 
 
 
 
 
 
 
 
 
 
d58e1c6
33ae6dd
 
 
 
decd5de
 
33ae6dd
 
 
decd5de
33ae6dd
 
 
d58e1c6
33ae6dd
 
d58e1c6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from pandas.io.formats.format import return_docstring
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer,AutoModelForMaskedLM
from transformers import pipeline
import os
import json
import random

@st.cache(show_spinner=False,persist=True)
def load_model(masked_text,model_name):

    model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=True)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    nlp = pipeline('fill-mask', model=model, tokenizer=tokenizer)
    
    MASK_TOKEN = tokenizer.mask_token
    
    masked_text = masked_text.replace("<mask>",MASK_TOKEN)
    result_sentence = nlp(masked_text)

    return result_sentence[0]['sequence']

def main():

    st.title("RoBERTa Hindi")
    st.markdown(
    "This demo uses pretrained RoBERTa variants for Mask Language Modeling (MLM)"
    )

    models = st.multiselect(
        "Choose models",                                         
        ['flax-community/roberta-hindi','mrm8488/HindiBERTa','ai4bharat/indic-bert',\
        'neuralspace-reverie/indic-transformers-hi-bert', 
          'surajp/RoBERTa-hindi-guj-san'],
        ["flax-community/roberta-hindi"]
    )   
    
    target_text_path = './mlm_custom/mlm_targeted_text.csv'
    target_text_df = pd.read_csv(target_text_path)
    
    texts = target_text_df['text']
    
    st.sidebar.title("Hindi MLM")
     
    pick_random = st.sidebar.checkbox("Pick any random text")

    #st.write('You selected:', masked_text)
    
    results_df = pd.DataFrame(columns = ['Model Name','Masked Text','Filled Masked Text'])
     
    model_names = []
    masked_texts = []
    filled_masked_texts = []
    
    if pick_random:
        random_text = texts[random.randint(0,texts.shape[0]-1)]
        masked_text = st.text_area("Please type a masked sentence to fill",random_text)
    else:
        select_text = st.sidebar.selectbox('Select any of the following text',\
     texts)
        masked_text = st.text_area("Please type a masked sentence to fill",select_text)

    if st.button('Fill the Mask!'):
        with st.spinner("Filling the Mask..."):

            for selected_model in models:

                filled_sentence = load_model(masked_text,selected_model)
                model_names.append(selected_model)
                masked_texts.append(masked_text)
                filled_masked_texts.append(filled_sentence)

            results_df['Model Name'] = model_names
            results_df['Masked Text'] = masked_texts
            results_df['Filled Masked Text'] = filled_masked_texts

            st.table(results_df)
            
if __name__ == "__main__":
    main()