mlkorra commited on
Commit
33ae6dd
·
1 Parent(s): decd5de

update app

Browse files
Files changed (2) hide show
  1. app.py +34 -16
  2. mlm_custom/mlm_test_config.csv +1 -1
app.py CHANGED
@@ -5,16 +5,18 @@ from transformers import AutoTokenizer,AutoModelForMaskedLM
5
  from transformers import pipeline
6
  import os
7
  import json
 
8
 
9
  @st.cache(show_spinner=False,persist=True)
10
  def load_model(masked_text,model_name):
11
 
12
  model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=True)
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
- # tokenizer.save_pretrained('exported_pytorch_model')
15
- # model.save_pretrained('exported_pytorch_model')
16
  nlp = pipeline('fill-mask', model=model, tokenizer=tokenizer)
17
-
 
 
 
18
  result_sentence = nlp(masked_text)
19
 
20
  return result_sentence[0]['sequence']
@@ -27,7 +29,7 @@ def main():
27
  )
28
 
29
  models = st.multiselect(
30
- "Choose models",
31
  ['flax-community/roberta-hindi','mrm8488/HindiBERTa','ai4bharat/indic-bert',\
32
  'neuralspace-reverie/indic-transformers-hi-bert',
33
  'surajp/RoBERTa-hindi-guj-san'],
@@ -40,24 +42,40 @@ def main():
40
  texts = target_text_df['text']
41
 
42
  st.sidebar.title("Hindi MLM")
43
- masked_text = st.sidebar.selectbox('Select any of the following text',
44
- texts)
45
-
46
- st.write('You selected:', masked_text)
47
 
48
  results_df = pd.DataFrame(columns = ['Model Name','Masked Text','Filled Masked Text'])
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- for selected_model in models:
 
 
 
51
 
52
- if st.button('Fill the Mask!'):
53
- with st.spinner("Filling the Mask..."):
54
  filled_sentence = load_model(masked_text,selected_model)
55
- results_df['Model Name'] = selected_model
56
- results_df['Masked Text'] = masked_text
57
- results_df['Filled Masked Text'] = filled_sentence
58
 
59
- st.table(results_df)
 
 
60
 
61
-
 
62
  if __name__ == "__main__":
63
  main()
 
5
  from transformers import pipeline
6
  import os
7
  import json
8
+ import random
9
 
10
  @st.cache(show_spinner=False,persist=True)
11
  def load_model(masked_text,model_name):
12
 
13
  model = AutoModelForMaskedLM.from_pretrained(model_name, from_flax=True)
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
15
  nlp = pipeline('fill-mask', model=model, tokenizer=tokenizer)
16
+
17
+ MASK_TOKEN = tokenizer.mask_token
18
+
19
+ masked_text = masked_text.replace("<mask>",MASK_TOKEN)
20
  result_sentence = nlp(masked_text)
21
 
22
  return result_sentence[0]['sequence']
 
29
  )
30
 
31
  models = st.multiselect(
32
+ "Choose models",
33
  ['flax-community/roberta-hindi','mrm8488/HindiBERTa','ai4bharat/indic-bert',\
34
  'neuralspace-reverie/indic-transformers-hi-bert',
35
  'surajp/RoBERTa-hindi-guj-san'],
 
42
  texts = target_text_df['text']
43
 
44
  st.sidebar.title("Hindi MLM")
45
+
46
+ pick_random = st.sidebar.checkbox("Pick any random text")
47
+
48
+ #st.write('You selected:', masked_text)
49
 
50
  results_df = pd.DataFrame(columns = ['Model Name','Masked Text','Filled Masked Text'])
51
+
52
+ model_names = []
53
+ masked_texts = []
54
+ filled_masked_texts = []
55
+
56
+ if pick_random:
57
+ random_text = texts[random.randint(0,texts.shape[0]-1)]
58
+ masked_text = st.text_area("Please type a masked sentence to fill",random_text)
59
+ else:
60
+ select_text = st.sidebar.selectbox('Select any of the following text',\
61
+ texts)
62
+ masked_text = st.text_area("Please type a masked sentence to fill",select_text)
63
 
64
+ if st.button('Fill the Mask!'):
65
+ with st.spinner("Filling the Mask..."):
66
+
67
+ for selected_model in models:
68
 
 
 
69
  filled_sentence = load_model(masked_text,selected_model)
70
+ model_names.append(selected_model)
71
+ masked_texts.append(masked_text)
72
+ filled_masked_texts.append(filled_sentence)
73
 
74
+ results_df['Model Name'] = model_names
75
+ results_df['Masked Text'] = masked_texts
76
+ results_df['Filled Masked Text'] = filled_masked_texts
77
 
78
+ st.table(results_df)
79
+
80
  if __name__ == "__main__":
81
  main()
mlm_custom/mlm_test_config.csv CHANGED
@@ -3,4 +3,4 @@ flax-community/roberta-hindi,,,TRUE,TRUE,TRUE
3
  mrm8488/HindiBERTa,,,FALSE,TRUE,TRUE
4
  ai4bharat/indic-bert,,,FALSE,FALSE,FALSE
5
  neuralspace-reverie/indic-transformers-hi-bert,,,FALSE,TRUE,TRUE
6
- surajp/RoBERTa-hindi-guj-san,,,FALSE,TRUE,TRUE
 
3
  mrm8488/HindiBERTa,,,FALSE,TRUE,TRUE
4
  ai4bharat/indic-bert,,,FALSE,FALSE,FALSE
5
  neuralspace-reverie/indic-transformers-hi-bert,,,FALSE,TRUE,TRUE
6
+ surajp/RoBERTa-hindi-guj-san,,,FALSE,TRUE,TRUE