Rob Caamano commited on
Commit
528da04
1 Parent(s): fd5e1d8
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  from transformers import AutoTokenizer
3
  from transformers import (
4
  TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
@@ -7,20 +8,38 @@ from transformers import pipeline
7
 
8
  st.title("Detecting Toxic Tweets")
9
 
10
- demo = """I'm so proud of myself for accomplishing my goals today. #motivation #success"""
11
 
12
  text = st.text_area("Input text", demo, height=250)
13
 
14
- mod_name = "distilbert-base-uncased-finetuned-sst-2-english"
 
 
 
 
 
 
 
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(mod_name)
17
  model = AutoModelForSequenceClassification.from_pretrained(mod_name)
18
  clf = pipeline(
19
- "sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
20
  )
21
 
22
  input = tokenizer(text, return_tensors="tf")
23
 
24
  if st.button("Submit", type="primary"):
25
  results = clf(text)[0]
26
- st.write(f"This tweet is: {results}.")
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
  from transformers import AutoTokenizer
4
  from transformers import (
5
  TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
 
8
 
9
  st.title("Detecting Toxic Tweets")
10
 
11
+ demo = """Your words are like poison. They seep into my mind and make me feel worthless."""
12
 
13
  text = st.text_area("Input text", demo, height=250)
14
 
15
+ # Add a drop-down menu for model selection
16
+ model_options = {
17
+ "DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
18
+ "Fine-tuned Toxicity Model": "https://huggingface.co/RobCaamano/toxicity_distilbert",
19
+ }
20
+ selected_model = st.selectbox("Select Model", options=list(model_options.keys()))
21
+
22
+ mod_name = model_options[selected_model]
23
 
24
  tokenizer = AutoTokenizer.from_pretrained(mod_name)
25
  model = AutoModelForSequenceClassification.from_pretrained(mod_name)
26
  clf = pipeline(
27
+ "text-classification", model=model, tokenizer=tokenizer, return_all_scores=True
28
  )
29
 
30
  input = tokenizer(text, return_tensors="tf")
31
 
32
  if st.button("Submit", type="primary"):
33
  results = clf(text)[0]
34
+ max_class = max(results, key=lambda x: x["score"])
35
+ tweet_portion = text[:50] + "..." if len(text) > 50 else text
36
+
37
+ # Create and display the table
38
+ df = pd.DataFrame(
39
+ {
40
+ "Tweet (portion)": [tweet_portion],
41
+ "Highest Toxicity Class": [max_class["label"]],
42
+ "Probability": [max_class["score"]],
43
+ }
44
+ )
45
+ st.table(df)