File size: 2,086 Bytes
2829bae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
import torch
import pandas as pd
import numpy as np
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

st.title('Sentiment Analysis with Streamlit')

speech = ""
with open("tweet.txt") as file:
    speech = "".join(line.rstrip() for line in file)

data = st.text_area(label="Text for Sentiment Analysis", value=speech)

models = ["sachiniyer/tweet_toxicity",
          "distilbert-base-uncased-finetuned-sst-2-english",
          "Ghost1/bert-base-uncased-finetuned_for_sentiment_analysis1-sst2",
          "Seethal/sentiment_analysis_generic_dataset",
          "sbcBI/sentiment_analysis_model",
          "juliensimon/reviews-sentiment-analysis"]

model_name = st.selectbox(
    'Which model do you want to use',
    models)


labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]

def score(item):
    return item['score']

def get_tokens(data, model):
    tokenizer = AutoTokenizer.from_pretrained("sachiniyer/tweet_toxicity")
    tokens = tokenizer(data, return_tensors="pt")
    return tokens

def get_out(tokens, model):
    output = model(**tokens)
    return output

def get_perc(output):
    return torch.sigmoid(output.logits).detach().numpy()[0]

def get_dict(percs, data):
    sorted_indices = np.argsort(percs)[-2:]
    row = {"text": data,
           "label 1": labels[sorted_indices[1]],
           "perc 1": str(round(percs[sorted_indices[1]], 3)),
           "label 2": labels[sorted_indices[0]],
           "perc 2": str(round(percs[sorted_indices[0]], 3))}
    return row

def get(data, model):
    tokens = get_tokens(data, model)
    output = get_out(tokens, model)
    percs = get_perc(output)
    d = get_dict(percs, data)
    return pd.DataFrame([d])

if st.button('Run model'):
    if model_name == "sachiniyer/tweet_toxicity":
        model = AutoModelForSequenceClassification.from_pretrained("sachiniyer/tweet_toxicity")
        d = get(data, model)
        st.table(d)
    else:
        generator = pipeline(model=model_name)
        st.markdown(generator(model_name))