hassiahk commited on
Commit
e35b6a7
1 Parent(s): f5b5a13

Added different pages for MLM and Classification

Browse files
Files changed (5) hide show
  1. app.py +9 -25
  2. apps/classifier.py +35 -0
  3. apps/mlm.py +47 -0
  4. config.json +5 -1
  5. multiapp.py +14 -0
app.py CHANGED
@@ -1,31 +1,15 @@
1
- import json
2
-
3
  import streamlit as st
4
- from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
5
-
6
-
7
- with open("config.json") as f:
8
- cfg = json.loads(f.read())
9
-
10
-
11
- @st.cache(allow_output_mutation=True)
12
- def load_model(input_text):
13
- tokenizer = AutoTokenizer.from_pretrained(cfg["model_name_or_path"])
14
- model = RobertaForSequenceClassification.from_pretrained(cfg["model_name_or_path"])
15
-
16
- nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
17
- result = nlp(input_text)
18
- return result
19
 
 
 
20
 
21
- st.title("RoBERTa Marathi")
22
 
23
- input_text = st.text_input("Text:")
 
 
 
 
24
 
25
- predict_button = st.button("Predict")
26
 
27
- if predict_button:
28
- with st.spinner("Generating prediction..."):
29
- # Get prediction here
30
- result = load_model(input_text)
31
- st.write(result)
 
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ from apps import classifier, mlm
4
+ from multiapp import MultiApp
5
 
 
6
 
7
+ def main():
8
+ app = MultiApp()
9
+ app.add_app("Fill Mask", mlm.app)
10
+ app.add_app("Text Classification", classifier.app)
11
+ app.run()
12
 
 
13
 
14
+ if __name__ == "__main__":
15
+ main()
 
 
 
apps/classifier.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import streamlit as st
4
+ from transformers import AutoTokenizer, RobertaForSequenceClassification, pipeline
5
+
6
+ with open("config.json") as f:
7
+ cfg = json.loads(f.read())
8
+
9
+
10
+ @st.cache(allow_output_mutation=True, show_spinner=False)
11
+ def load_model(input_text, model_name_or_path):
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
13
+ model = RobertaForSequenceClassification.from_pretrained(model_name_or_path)
14
+
15
+ nlp = pipeline("text-classification", model=model, tokenizer=tokenizer)
16
+ result = nlp(input_text)
17
+ return result
18
+
19
+
20
+ def app():
21
+ st.title("RoBERTa Marathi")
22
+
23
+ classifier = st.sidebar.selectbox("Select a Model", index=0, options=["Indic NLP", "iNLTK"])
24
+
25
+ model_name_or_path = cfg["models"][classifier]
26
+ input_text = st.text_input("Text:")
27
+
28
+ predict_button = st.button("Predict")
29
+
30
+ if predict_button:
31
+ with st.spinner("Generating prediction..."):
32
+ # Get prediction here
33
+ result = load_model(input_text, model_name_or_path)
34
+
35
+ st.markdown("**Predicted label:** " + result[0]["label"])
apps/mlm.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import streamlit as st
4
+ from transformers import AutoTokenizer, RobertaForMaskedLM, pipeline
5
+
6
+ with open("config.json", encoding="utf8") as f:
7
+ cfg = json.loads(f.read())
8
+
9
+ sample_texts = [
10
+ {
11
+ "original_text": "मोठी बातमी! उद्या दुपारी १ वाजता जाहीर होणार दहावीचा निकाल",
12
+ "masked_text": "मोठी बातमी! उद्या दुपारी <mask> वाजता जाहीर होणार दहावीचा निकाल",
13
+ },
14
+ {
15
+ "original_text": "अध्यक्ष शरद पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
16
+ "masked_text": "अध्यक्ष <mask> पवार आणि उपमुख्यमंत्री अजित पवार यांची भेट घेतली.",
17
+ },
18
+ ]
19
+
20
+
21
+ @st.cache(allow_output_mutation=True, show_spinner=False)
22
+ def load_model(input_text, model_name_or_path):
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
24
+ model = RobertaForMaskedLM.from_pretrained(model_name_or_path)
25
+
26
+ nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer)
27
+ result = nlp(input_text)
28
+ sentence, mask = result[0]["sequence"], result[0]["token_str"]
29
+ return sentence, mask
30
+
31
+
32
+ def app():
33
+ st.title("RoBERTa Marathi")
34
+
35
+ masked_texts = [example["masked_text"] for example in sample_texts]
36
+ original_texts = [example["original_text"] for example in sample_texts]
37
+
38
+ input_text = st.sidebar.selectbox("Select a Text", options=masked_texts)
39
+ masked_text = st.text_area("Please type a masked sentence to fill", input_text)
40
+
41
+ fill_button = st.button("Fill the Mask!")
42
+
43
+ if fill_button:
44
+ with st.spinner("Filling the Mask..."):
45
+ filled_sentence, mask = load_model(masked_text, cfg["models"]["RoBERTa"])
46
+
47
+ st.markdown(f"**Filled sentence: ** {filled_sentence}\n\n**Predicted masked token: **{mask}")
config.json CHANGED
@@ -1,3 +1,7 @@
1
  {
2
- "model_name_or_path": "flax-community/mr-indicnlp-classifier"
 
 
 
 
3
  }
1
  {
2
+ "models": {
3
+ "Indic NLP": "flax-community/mr-indicnlp-classifier",
4
+ "iNLTK": "flax-community/mr-inltk-classifier",
5
+ "RoBERTa": "flax-community/roberta-base-mr"
6
+ }
7
  }
multiapp.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ class MultiApp:
5
+ def __init__(self):
6
+ self.apps = []
7
+
8
+ def add_app(self, title, func):
9
+ self.apps.append({"title": title, "function": func})
10
+
11
+ def run(self):
12
+ st.sidebar.header("Tasks")
13
+ app = st.sidebar.radio("", self.apps, format_func=lambda app: app["title"])
14
+ app["function"]()