Spaces:
Sleeping
Sleeping
File size: 5,112 Bytes
7f81307 0147eeb 5e81a63 869a57d d1320a7 7179214 2427ad9 5e81a63 f3c89dc 664aa80 5e81a63 6f318d4 f3c89dc 5e81a63 611cacb 5e81a63 743c8db 5e81a63 4a09c2d 5e81a63 4a09c2d 5e81a63 4a09c2d 5e81a63 4a09c2d 593dcd8 c25ad0a 4a09c2d c25ad0a 5e81a63 9bceaae 5e6cddb 5e81a63 541a50a 743c8db 869a57d 5e6cddb f3c89dc 5e81a63 f3c89dc 39007cc 2427ad9 5e6cddb d1320a7 869a57d 5e6cddb 869a57d 5e6cddb 39007cc 5e81a63 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import streamlit as st
import torch
from transformers import AutoModelForSequenceClassification, pipeline, AutoTokenizer, DistilBertForSequenceClassification, DistilBertTokenizerFast
import pandas as pd
import comments
from random import randint
import requests
def predict_cyberbullying_probability(sentence, tokenizer, model):
# Preprocess the input sentence
inputs = tokenizer(sentence, padding='max_length', return_token_type_ids=False, return_attention_mask=True, truncation=True, max_length=512, return_tensors='pt')
attention_mask = inputs['attention_mask']
inputs = inputs['input_ids']
with torch.no_grad():
# Forward pass
outputs = model(inputs, attention_mask=attention_mask)
probs = torch.sigmoid(outputs.logits.unsqueeze(1).flatten())
res = probs.numpy().tolist()
return res
# @st.cache
def perform_cyberbullying_analysis(tweet):
with st.spinner(text="loading model, wait until spinner ends..."):
model = AutoModelForSequenceClassification.from_pretrained('kingsotn/finetuned_cyberbullying')
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
df = pd.DataFrame({'comment': [tweet]})
list_probs = predict_cyberbullying_probability(tweet, tokenizer, model)
for i, label in enumerate(labels[1:]):
df[label] = list_probs[i]
return df
def perform_default_analysis(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
clf = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, framework="pt")
tweet = st.text_area(label="Enter Text:",value="I'm nice at ping pong")
submitted = st.form_submit_button("Analyze")
if submitted:
#loading bar
with st.spinner(text="loading..."):
out = clf(tweet)
st.json(out)
if out[0]["label"] == "POSITIVE" or out[0]["label"] == "POS":
st.balloons()
# prompt = f"{basic_prompt} + \n\nThe user wrote a tweet that says: {tweet}, compliment them on how nice of a person they are! Remember try to be as cringe and awkard as possible!"
# response = generator(prompt, max_length=1000)[0]
st.success("nice tweet!")
else:
# prompt = f"{basic_prompt} + \n\nThe user wrote a tweet that says: {tweet}, tell them on how terrible of a person they are! Remember try to be as cringe and awkard as possible!"
# response = generator(prompt, max_length=1000)[0]
st.error("bad tweet!")
# main -->
st.title("Toxic Tweets Analyzer")
st.write("💡 Toxic Tweets Analyzer is an app that helps you determine the likelihood of a tweet or any text being toxic, abusive or cyberbullying. The app offers different pre-trained models to choose from, each with their own strengths and limitations. kingsotn/finetuned_cyberbullying is a finetuned distilbert. It uses artificial intelligence to analyze the text you input and then calculates a probability score for each label: toxic, severe_toxic, obscene, threat, insult, and identity_hate. The scores range from 0 to 1, with 1 being the highest probability of that label being present in the tweet. The output is a table that shows the probability scores for each label, giving you an idea of the toxicity of the tweet. This can be helpful in identifying and preventing cyberbullying and other forms of online abuse.")
image = "kanye_loves_tweet.jpg"
st.image(image, use_column_width=True)
labels = ['comment', 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
with st.form("my_form"):
#select model
model_name = st.selectbox("Enter a text and select a pre-trained model to get the sentiment analysis", ["kingsotn/finetuned_cyberbullying", "distilbert-base-uncased-finetuned-sst-2-english", "finiteautomata/bertweet-base-sentiment-analysis", "distilbert-base-uncased"])
if model_name == "kingsotn/finetuned_cyberbullying":
default = "I'm not even going to lie to you. I love me so much right now."
tweet = st.text_area(label="Enter Text:",value=default)
submitted = st.form_submit_button("Analyze textbox")
random = st.form_submit_button("Get a random 😈😈😈 tweet (warning!!)")
kanye = st.form_submit_button("Get a ye quote 🐻🎤🎧🎶")
if random:
tweet = comments.comments[randint(0, 354)]
st.write(tweet)
submitted = True
if kanye:
response = requests.get('https://api.kanye.rest/')
if response.status_code == 200:
data = response.json()
tweet = data['quote']
else:
st.error("Error getting Kanye quote | status code: " + str(response.status_code))
st.write(tweet)
submitted = True
if submitted:
df = perform_cyberbullying_analysis(tweet)
st.table(df)
else:
perform_default_analysis(model_name) |