mlkorra commited on
Commit
d7a6200
1 Parent(s): 5f2b2bc
Files changed (3) hide show
  1. app.py +67 -0
  2. input/tweet_list.csv +0 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import random
4
+ import pandas as pd
5
+ import streamlit as st
6
+ from transformers import AutoModelForSequenceClassification,AutoTokenizer,pipeline
7
+ from transformers_interpret import SequenceClassificationExplainer
8
+
9
+ @st.cache
10
+ def load_model(text):
11
+
12
+
13
+ checkpoint = 'mlkorra/OGBV-gender-bert-hi-en'
14
+
15
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
16
+ model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
17
+
18
+ nlp = pipeline('sentiment-analysis',model=model,tokenizer=tokenizer)
19
+
20
+
21
+ results = nlp(text)
22
+
23
+ return results
24
+ #MASK_TOKEN = tokenizer.mask_token
25
+ #masked_text = masked_text.replace("<mask>", MASK_TOKEN)
26
+ #result_sentence = nlp(masked_text)
27
+ #return result_sentence[0]["sequence"], result_sentence[0]["token_str"]
28
+
29
+ def app():
30
+ st.title("OGBV-BERT")
31
+ #st.markdown("This demo uses multiple hindi transformer models for Masked Language Modelling (MLM).")
32
+ #models_list = list(cfg["models"].keys())
33
+ #models = st.multiselect("Choose models", models_list, models_list)
34
+
35
+ target_text_path = "./input/tweet_list.csv"
36
+ target_text_df = pd.read_csv(target_text_path)
37
+ texts = target_text_df["text"]
38
+ st.sidebar.title("Place")
39
+ pick_random = st.sidebar.checkbox("Pick any random text")
40
+
41
+ #results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"])
42
+ #model_names = []
43
+ ##filled_masked_texts = []
44
+ #filled_tokens = []
45
+ if pick_random:
46
+ random_text = texts[random.randint(0, texts.shape[0] - 1)]
47
+ masked_text = st.text_area("Please type a sentence to classify", random_text)
48
+ else:
49
+ select_text = st.sidebar.selectbox("Select any of the following text", texts)
50
+ masked_text = st.text_area("Please type a sentence to classify", select_text)
51
+
52
+
53
+ # pd.set_option('max_colwidth',30)
54
+ if st.button("Classify"):
55
+ with st.spinner("Classifying the sentence..."):
56
+ pred = load_model(masked_text)
57
+ st.write(pred)
58
+
59
+ # for selected_model in models:
60
+ # filled_sentence, filled_token = load_model(masked_text, cfg["models"][selected_model])
61
+ # model_names.append(selected_model)
62
+ # filled_tokens.append(filled_token)
63
+ # filled_masked_texts.append(filled_sentence)
64
+ # results_df["Model Name"] = model_names
65
+ # results_df["Filled Token"] = filled_tokens
66
+ # results_df["Filled Text"] = filled_masked_texts
67
+ # st.table(results_df)
input/tweet_list.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ transformers==4.10.0
3
+ transformers_interpret