File size: 2,127 Bytes
7f8de92
0fe2dfa
 
0934d02
e862b67
 
d0fcf82
3754612
0f64e73
e862b67
d077642
0f64e73
17bb76c
854d0de
a2a9b24
 
0934d02
17bb76c
 
be6580d
a2a9b24
 
 
34b533c
17bb76c
 
be6580d
5d5484f
61af1a8
5d5484f
 
 
d90a15e
c5ececb
 
 
 
 
b6d255b
c5ececb
a169dec
 
 
2fda257
a169dec
 
 
bed48c7
c5ececb
 
 
de6a24f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

st.title("Milestone #3 offensive statement prediction with pre-trained models")
st.write("in this basic demo you can select a model to judge whether or not the text below is offensive and how it's offensive. the options are[toxic, severe_toxic, obscene, threat, insult, identity_hate]")
text = "I am going to mess you up"
st.write(text)

options = ["Greys/milestonemodel"]
model = st.selectbox("Select a  pre-trained model", options)

con = st.button("Submit")
if con:
  if model == "zero-shot-classification":
    classifier = pipeline(model)
    res = classifier(text, candidate_labels=["offensive"])
    label = res['labels'][0]
    score = res['scores'][0]
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
  
  if model == "cardiffnlp/twitter-roberta-base-offensive":
    classifier = pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-offensive', tokenizer='cardiffnlp/twitter-roberta-base-offensive')
    result = classifier(text)
    label = result[0]['label']
    score = result[0]['score']
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
    
  if model == "Greys/milestonemodel":
  
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    model = AutoModelForSequenceClassification.from_pretrained("Greys/milestonemodel")
    my_list = [' ','toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
  def classify_sentence(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=1)
    return probs.detach().numpy()[0]
  probs = classify_sentence(text)
  def find_largest_number(numbers):
    max_num = numbers[0]
    max_index = 0
    for i in range(1, len(numbers)):
        if numbers[i] > max_num:
            max_num = numbers[i]
            max_index = i
    return max_index

  print(probs)    
  index = find_largest_number(probs)
  st.write(my_list[index])
#id,toxic,severe_toxic,obscene,threat,insult,identity_hate