Rob Caamano
commited on
Commit
•
528da04
1
Parent(s):
fd5e1d8
App 2.0
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
from transformers import AutoTokenizer
|
3 |
from transformers import (
|
4 |
TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
|
@@ -7,20 +8,38 @@ from transformers import pipeline
|
|
7 |
|
8 |
st.title("Detecting Toxic Tweets")
|
9 |
|
10 |
-
demo = """
|
11 |
|
12 |
text = st.text_area("Input text", demo, height=250)
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
tokenizer = AutoTokenizer.from_pretrained(mod_name)
|
17 |
model = AutoModelForSequenceClassification.from_pretrained(mod_name)
|
18 |
clf = pipeline(
|
19 |
-
"
|
20 |
)
|
21 |
|
22 |
input = tokenizer(text, return_tensors="tf")
|
23 |
|
24 |
if st.button("Submit", type="primary"):
|
25 |
results = clf(text)[0]
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
from transformers import AutoTokenizer
|
4 |
from transformers import (
|
5 |
TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
|
|
|
8 |
|
9 |
st.title("Detecting Toxic Tweets")
|
10 |
|
11 |
+
demo = """Your words are like poison. They seep into my mind and make me feel worthless."""
|
12 |
|
13 |
text = st.text_area("Input text", demo, height=250)
|
14 |
|
15 |
+
# Add a drop-down menu for model selection
|
16 |
+
model_options = {
|
17 |
+
"DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
|
18 |
+
"Fine-tuned Toxicity Model": "https://huggingface.co/RobCaamano/toxicity_distilbert",
|
19 |
+
}
|
20 |
+
selected_model = st.selectbox("Select Model", options=list(model_options.keys()))
|
21 |
+
|
22 |
+
mod_name = model_options[selected_model]
|
23 |
|
24 |
tokenizer = AutoTokenizer.from_pretrained(mod_name)
|
25 |
model = AutoModelForSequenceClassification.from_pretrained(mod_name)
|
26 |
clf = pipeline(
|
27 |
+
"text-classification", model=model, tokenizer=tokenizer, return_all_scores=True
|
28 |
)
|
29 |
|
30 |
input = tokenizer(text, return_tensors="tf")
|
31 |
|
32 |
if st.button("Submit", type="primary"):
|
33 |
results = clf(text)[0]
|
34 |
+
max_class = max(results, key=lambda x: x["score"])
|
35 |
+
tweet_portion = text[:50] + "..." if len(text) > 50 else text
|
36 |
+
|
37 |
+
# Create and display the table
|
38 |
+
df = pd.DataFrame(
|
39 |
+
{
|
40 |
+
"Tweet (portion)": [tweet_portion],
|
41 |
+
"Highest Toxicity Class": [max_class["label"]],
|
42 |
+
"Probability": [max_class["score"]],
|
43 |
+
}
|
44 |
+
)
|
45 |
+
st.table(df)
|