toxic-tweets / app.py
sachiniyer's picture
commit again
2829bae
raw
history blame
2.09 kB
import streamlit as st
import torch
import pandas as pd
import numpy as np
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
st.title('Sentiment Analysis with Streamlit')
speech = ""
with open("tweet.txt") as file:
speech = "".join(line.rstrip() for line in file)
data = st.text_area(label="Text for Sentiment Analysis", value=speech)
models = ["sachiniyer/tweet_toxicity",
"distilbert-base-uncased-finetuned-sst-2-english",
"Ghost1/bert-base-uncased-finetuned_for_sentiment_analysis1-sst2",
"Seethal/sentiment_analysis_generic_dataset",
"sbcBI/sentiment_analysis_model",
"juliensimon/reviews-sentiment-analysis"]
model_name = st.selectbox(
'Which model do you want to use',
models)
labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
def score(item):
return item['score']
def get_tokens(data, model):
tokenizer = AutoTokenizer.from_pretrained("sachiniyer/tweet_toxicity")
tokens = tokenizer(data, return_tensors="pt")
return tokens
def get_out(tokens, model):
output = model(**tokens)
return output
def get_perc(output):
return torch.sigmoid(output.logits).detach().numpy()[0]
def get_dict(percs, data):
sorted_indices = np.argsort(percs)[-2:]
row = {"text": data,
"label 1": labels[sorted_indices[1]],
"perc 1": str(round(percs[sorted_indices[1]], 3)),
"label 2": labels[sorted_indices[0]],
"perc 2": str(round(percs[sorted_indices[0]], 3))}
return row
def get(data, model):
tokens = get_tokens(data, model)
output = get_out(tokens, model)
percs = get_perc(output)
d = get_dict(percs, data)
return pd.DataFrame([d])
if st.button('Run model'):
if model_name == "sachiniyer/tweet_toxicity":
model = AutoModelForSequenceClassification.from_pretrained("sachiniyer/tweet_toxicity")
d = get(data, model)
st.table(d)
else:
generator = pipeline(model=model_name)
st.markdown(generator(model_name))