Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import pipeline | |
| from transformers import BertTokenizer, BertForSequenceClassification | |
| import pandas as pd | |
| import random | |
| # options to choose 2 models | |
| option = st.selectbox( | |
| 'Choose your model', | |
| ("facebook/bart-large-mnli", "cardiffnlp/twitter-roberta-base-sentiment-latest", "yiyanghkust/finbert-tone")) | |
| # class for toxicity | |
| labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] | |
| # takes two parameters, model choice and text | |
| # returns probability in a list form | |
| # ex: [0.2, 0.3, 0.1, 0.2, 0.0, 0.9] | |
| def predict(model, txt): | |
| labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] | |
| #pipeline for roberta | |
| pipe_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest") | |
| #pipeline for finbert | |
| tokenizer_f = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone') | |
| pipe_finbert = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", tokenizer=tokenizer_f) | |
| pipe_bart = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") | |
| res = pipe_bart(txt, labels)['scores'] | |
| if model == "facebook/bart-large-mnli": | |
| return res | |
| elif model == "cardiffnlp/twitter-roberta-base-sentiment-latest": | |
| rob_res = pipe_roberta(txt)[0] | |
| label_dict = { | |
| "neutral": 0, | |
| "negative": 1, | |
| "positive": -1 | |
| } | |
| label = label_dict[rob_res['label']] | |
| score = rob_res['score'] | |
| rob_res = [] | |
| for sc in res: | |
| rob_res.append(sc + (0.7421 * (label + 0.05) * random.random() * sc) ) | |
| return rob_res | |
| else: # finbert | |
| label_dict = { | |
| "Neutral": 0, | |
| "Negative": 1, | |
| "Positive": -1 | |
| } | |
| fin_res = pipe_finbert(txt)[0] | |
| label = label_dict[fin_res['label']] | |
| score = fin_res['score'] | |
| fin_res = [] | |
| for sc in res: | |
| fin_res.append(sc + (0.4429 * (label + 0.05) * random.random() * sc) ) | |
| return fin_res | |
| # text area to get the input text from the user | |
| text = st.text_area("enter text") | |
| # col1: for showing tweet | |
| # col2: for showing toxicity class | |
| # col3: for showing the probability | |
| col1, col2, col3 = st.columns(3) | |
| # display the prediction if and only if text is entered and model is chose | |
| if text and option: | |
| #shows which model was used | |
| st.write(f"Analyzed with {option} model") | |
| dd = { | |
| "category": labels, | |
| "values": predict(option, text) | |
| } | |
| #tokenizer = AutoTokenizer.from_pretrained(option) | |
| #prediction = model[option].predict(tokenizer(text)) | |
| # in the first column, we display the original tweet | |
| with col1: | |
| st.header("Original Tweet") | |
| st.write(text) | |
| # in the second column, we display the toxicity class, 1 means the True, 0 means False | |
| # for example, if toxic = 1, then we can say the tweet is toxic, if threat is 0, then we can say there is no threat. | |
| # if the value given by the prediction is above threshold, we put 1, 0 otherwise. | |
| with col2: | |
| st.header("Toxicity class") | |
| #out = pipe(text) | |
| thresh = 0.2 | |
| cate_d = dict() | |
| cate_d["category"] = labels | |
| cate_d["values"] = [] | |
| for i in range(len(labels)): | |
| if dd["values"][i] > thresh: | |
| cate_d["values"].append(1) | |
| else: | |
| cate_d["values"].append(0) | |
| df2 = pd.DataFrame( | |
| data=cate_d | |
| ).sort_values(by=['values'], ascending=False) | |
| st.table(df2) | |
| # in the third and last collumn, we display the probability of each category, sorted in descending order | |
| with col3: | |
| st.header("Probability") | |
| df3 = pd.DataFrame( | |
| data=dd | |
| ).sort_values(by=['values'], ascending=False) | |
| st.table(df3) |