mlkorra
commited on
Commit
•
d7a6200
1
Parent(s):
5f2b2bc
Add app
Browse files- app.py +67 -0
- input/tweet_list.csv +0 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import random
|
4 |
+
import pandas as pd
|
5 |
+
import streamlit as st
|
6 |
+
from transformers import AutoModelForSequenceClassification,AutoTokenizer,pipeline
|
7 |
+
from transformers_interpret import SequenceClassificationExplainer
|
8 |
+
|
9 |
+
@st.cache
|
10 |
+
def load_model(text):
|
11 |
+
|
12 |
+
|
13 |
+
checkpoint = 'mlkorra/OGBV-gender-bert-hi-en'
|
14 |
+
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
|
16 |
+
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
|
17 |
+
|
18 |
+
nlp = pipeline('sentiment-analysis',model=model,tokenizer=tokenizer)
|
19 |
+
|
20 |
+
|
21 |
+
results = nlp(text)
|
22 |
+
|
23 |
+
return results
|
24 |
+
#MASK_TOKEN = tokenizer.mask_token
|
25 |
+
#masked_text = masked_text.replace("<mask>", MASK_TOKEN)
|
26 |
+
#result_sentence = nlp(masked_text)
|
27 |
+
#return result_sentence[0]["sequence"], result_sentence[0]["token_str"]
|
28 |
+
|
29 |
+
def app():
|
30 |
+
st.title("OGBV-BERT")
|
31 |
+
#st.markdown("This demo uses multiple hindi transformer models for Masked Language Modelling (MLM).")
|
32 |
+
#models_list = list(cfg["models"].keys())
|
33 |
+
#models = st.multiselect("Choose models", models_list, models_list)
|
34 |
+
|
35 |
+
target_text_path = "./input/tweet_list.csv"
|
36 |
+
target_text_df = pd.read_csv(target_text_path)
|
37 |
+
texts = target_text_df["text"]
|
38 |
+
st.sidebar.title("Place")
|
39 |
+
pick_random = st.sidebar.checkbox("Pick any random text")
|
40 |
+
|
41 |
+
#results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"])
|
42 |
+
#model_names = []
|
43 |
+
##filled_masked_texts = []
|
44 |
+
#filled_tokens = []
|
45 |
+
if pick_random:
|
46 |
+
random_text = texts[random.randint(0, texts.shape[0] - 1)]
|
47 |
+
masked_text = st.text_area("Please type a sentence to classify", random_text)
|
48 |
+
else:
|
49 |
+
select_text = st.sidebar.selectbox("Select any of the following text", texts)
|
50 |
+
masked_text = st.text_area("Please type a sentence to classify", select_text)
|
51 |
+
|
52 |
+
|
53 |
+
# pd.set_option('max_colwidth',30)
|
54 |
+
if st.button("Classify"):
|
55 |
+
with st.spinner("Classifying the sentence..."):
|
56 |
+
pred = load_model(masked_text)
|
57 |
+
st.write(pred)
|
58 |
+
|
59 |
+
# for selected_model in models:
|
60 |
+
# filled_sentence, filled_token = load_model(masked_text, cfg["models"][selected_model])
|
61 |
+
# model_names.append(selected_model)
|
62 |
+
# filled_tokens.append(filled_token)
|
63 |
+
# filled_masked_texts.append(filled_sentence)
|
64 |
+
# results_df["Model Name"] = model_names
|
65 |
+
# results_df["Filled Token"] = filled_tokens
|
66 |
+
# results_df["Filled Text"] = filled_masked_texts
|
67 |
+
# st.table(results_df)
|
input/tweet_list.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
transformers==4.10.0
|
3 |
+
transformers_interpret
|