Oliver Li commited on
Commit
809559e
1 Parent(s): 46d426f

milestone3

Browse files
Files changed (2) hide show
  1. app.py +49 -18
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,23 +1,29 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
3
 
4
  # Function to load the pre-trained model
5
  def load_model(model_name):
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
- sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
9
- return sentiment_pipeline
10
 
11
  # Streamlit app
12
- st.title("Basic Sentiment Analysis App")
13
- st.write("Enter a text and select a pre-trained model to get the sentiment analysis.")
14
 
15
  # Input text
16
- default_text = "I love my life."
17
  text = st.text_input("Enter your text:", value=default_text)
18
 
 
 
 
19
  # Model selection
20
  model_options = {
 
 
 
21
  "distilbert-base-uncased-finetuned-sst-2-english": {
22
  "labels": ["NEGATIVE", "POSITIVE"],
23
  "description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
@@ -31,24 +37,49 @@ model_options = {
31
  "description": "This model classifies tweets into negative (LABEL_0), neutral(LABEL_1), or positive(LABEL_2) sentiment. It is based on RoBERTa and fine-tuned on a large dataset of tweets.",
32
  },
33
  }
34
- selected_model = st.selectbox("Choose a pre-trained model:", model_options)
35
 
36
  st.write("### Model Information")
37
- st.write(f"**Labels:** {', '.join(model_options[selected_model]['labels'])}")
38
  st.write(f"**Description:** {model_options[selected_model]['description']}")
39
 
40
- # Load the model and perform sentiment analysis
41
  if st.button("Analyze"):
42
  if not text:
43
  st.write("Please enter a text.")
44
  else:
45
- with st.spinner("Analyzing sentiment..."):
46
- sentiment_pipeline = load_model(selected_model)
47
- result = sentiment_pipeline(text)
48
- st.write(f"Sentiment: {result[0]['label']} (confidence: {result[0]['score']:.2f})")
49
- if result[0]['label'] in ['POSITIVE', 'LABEL_1'] and result[0]['score']> 0.9:
50
- st.balloons()
51
- elif result[0]['label'] in ['NEGATIVE', 'LABEL_0'] and result[0]['score']> 0.9:
52
- st.error("Hater detected.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  else:
54
- st.write("Enter a text and click 'Analyze' to perform sentiment analysis.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
4
 
5
  # Function to load the pre-trained model
6
  def load_model(model_name):
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
  model = AutoModelForSequenceClassification.from_pretrained(model_name)
9
+ return tokenizer, model
 
10
 
11
  # Streamlit app
12
+ st.title("Multi-label Toxicity Detection App")
13
+ st.write("Enter a text and select the fine-tuned model to get the toxicity analysis.")
14
 
15
  # Input text
16
+ default_text = "I will kill you if you do not give me my pop tarts."
17
  text = st.text_input("Enter your text:", value=default_text)
18
 
19
+ category = {'LABEL_0': 'toxic', 'LABEL_1': 'severe_toxic', 'LABEL_2': 'obscene', 'LABEL_3': 'threat', 'LABEL_4': 'insult', 'LABEL_5': 'identity_hate'}
20
+
21
+
22
  # Model selection
23
  model_options = {
24
+ "Olivernyu/finetuned_bert_base_uncased": {
25
+ "description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
26
+ },
27
  "distilbert-base-uncased-finetuned-sst-2-english": {
28
  "labels": ["NEGATIVE", "POSITIVE"],
29
  "description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
 
37
  "description": "This model classifies tweets into negative (LABEL_0), neutral(LABEL_1), or positive(LABEL_2) sentiment. It is based on RoBERTa and fine-tuned on a large dataset of tweets.",
38
  },
39
  }
40
+ selected_model = st.selectbox("Choose a fine-tuned model:", model_options)
41
 
42
  st.write("### Model Information")
 
43
  st.write(f"**Description:** {model_options[selected_model]['description']}")
44
 
45
+ # Load the model and perform toxicity analysis
46
  if st.button("Analyze"):
47
  if not text:
48
  st.write("Please enter a text.")
49
  else:
50
+ with st.spinner("Analyzing toxicity..."):
51
+ if selected_model == "Olivernyu/finetuned_bert_base_uncased":
52
+ tokenizer, model = load_model(selected_model)
53
+ toxicity_detector = pipeline("text-classification", tokenizer=tokenizer, model=model)
54
+ outputs = toxicity_detector(text, top_k=2)
55
+
56
+ category_names = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
57
+ scores = [output["score"] for output in outputs[0]]
58
+
59
+ # Get the highest toxicity class and its probability
60
+ max_score_index = scores.index(max(scores))
61
+ highest_toxicity_class = category_names[max_score_index]
62
+ highest_probability = scores[max_score_index]
63
+
64
+ results = []
65
+ for item in outputs:
66
+ results.append((category[item['label']], item['score']))
67
+
68
+ # Create a table with the input text (or a portion of it), the highest toxicity class, and its probability
69
+ table_data = {
70
+ "Text (portion)": [text[:50]],
71
+ f"{results[0][0]}": results[0][1],
72
+ f"{results[1][0]}": results[1][1]
73
+ }
74
+ table_df = pd.DataFrame(table_data)
75
+ st.table(table_df)
76
+ else:
77
+ sentiment_pipeline = load_model(selected_model)
78
+ result = sentiment_pipeline(text)
79
+ st.write(f"Sentiment: {result[0]['label']} (confidence: {result[0]['score']:.2f})")
80
+ if result[0]['label'] in ['POSITIVE', 'LABEL_1'] and result[0]['score']> 0.9:
81
+ st.balloons()
82
+ elif result[0]['label'] in ['NEGATIVE', 'LABEL_0'] and result[0]['score']> 0.9:
83
+ st.error("Hater detected.")
84
  else:
85
+ st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  streamlit
2
  torch
3
  transformers
 
 
1
  streamlit
2
  torch
3
  transformers
4
+ pandas