mariaoliv commited on
Commit
11d3763
1 Parent(s): 388ba39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -15
app.py CHANGED
@@ -1,26 +1,109 @@
1
- import numpy as np
 
 
 
 
2
  import pandas as pd
3
- from transformers import pipeline
4
- from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
5
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- classifier = pipeline("sentiment-analysis")
8
 
9
- model_name ="distilbert-base-uncased-finetuned-sst-2-english"
 
10
 
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
- model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
 
 
 
 
 
13
 
14
- classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
 
 
 
15
 
 
 
 
 
16
 
17
- print(classifier("Thanks a lot for watching the video. Really appreciate it"))
 
18
 
19
- st.title("Sentiment Analysis of a Tweet")
20
- st.write("Enter text for sentiment analysis")
21
- tweet = st.text_input(label="Tweet Text")
22
 
23
 
24
- if(st.button("Analyze")):
25
- sentiment = classifier(tweet)
26
- st.write(sentiment[0]['label'])
 
1
+ import tensorflow as tf
2
+ import torch
3
+ import torch.nn.functional as F
4
+ from torch.utils.data import Dataset
5
+ #from transformers import BertTokenizer #, BertForSequenceClassification
6
  import pandas as pd
7
+ import numpy as np
 
8
  import streamlit as st
9
+ from sklearn.model_selection import train_test_split
10
+ from transformers import pipeline
11
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
12
+ from transformers import Trainer, TrainingArguments
13
+
14
+ #------Test TEXT for ST Forn input
15
+ #I just learned how to suck up to people. You're very good at it, FisherQueen. As for the grammer, it should be obvious that they're typos, now pick out a mistake here, bitch!
16
+
17
+ PATH = 'C:/Users/maria/Downloads/bert_base_uncased_fine_tuned_model.pth'
18
+ #saved_model = torch.load(PATH,map_location=torch.device('cpu'))
19
+
20
+
21
+ ######BERT_MODEL_NAME = 'bert-base-cased'
22
+ BERT_MODEL_NAME = 'distilbert-base-uncased' #'bert-base-uncased'#'bert-base-cased'
23
+
24
+ #tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
25
+ #tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
26
+ ###saved_model = torch.load(PATH,map_location=torch.device('cpu'))
27
+
28
+ LABEL_COLUMNS=["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
29
+ labels = LABEL_COLUMNS
30
+ id2label = {idx:label for idx, label in enumerate(labels)}
31
+ label2id = {label:idx for idx, label in enumerate(labels)}
32
+
33
+ USER = 'mariasandu/'
34
+ SAVED_MODEL_NAME_ENDING = '-for-toxic-comments-clf'
35
+
36
+ st.sidebar.header("Choose Model First")
37
+
38
+ #str = BERT_MODEL_NAME + '-for-toxic-comments-clf'
39
+ model_name = st.sidebar.selectbox("Select Model",
40
+ (
41
+ 'bert-base-cased' + SAVED_MODEL_NAME_ENDING,
42
+ 'distilbert-base-uncased' + SAVED_MODEL_NAME_ENDING)
43
+ )
44
+
45
+ if(model_name == 'bert-base-cased' + SAVED_MODEL_NAME_ENDING):
46
+ BERT_MODEL_NAME = 'bert-base-cased'
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
49
+
50
+ st.sidebar.write('Selected Model:')
51
+ st.sidebar.write(model_name)
52
+
53
+ saved_model = AutoModelForSequenceClassification.from_pretrained(USER + model_name, #BERT_MODEL_NAME + '-for-toxic-comments-clf',
54
+ use_auth_token='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')
55
+
56
+ def get_text_toxiccom(text):
57
+ encoding = tokenizer(text, return_tensors="pt")
58
+ encoding = {k: v.to(saved_model.device) for k,v in encoding.items()}
59
+
60
+ outputs = saved_model(**encoding)
61
+ logits = outputs.logits
62
+ #print(outputs.logits)
63
+ #print(logits.shape)
64
+
65
+ # apply sigmoid + threshold
66
+ sigmoid = torch.nn.Sigmoid()
67
+ probs = sigmoid(logits.squeeze().cpu())
68
+ pred_prob_list = probs.tolist()
69
+
70
+ predictions = np.zeros(probs.shape)
71
+ predictions[np.where(probs >= 0.5)] = 1
72
+ # turn predicted id's into actual label names
73
+ #predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
74
+ predicted_labels = []
75
+ for idx,label in enumerate(predictions):
76
+ if predictions[idx] ==1:
77
+ predicted_labels.append(labels[idx])
78
+ else:
79
+ predicted_labels.append('-----')
80
+
81
+ return pred_prob_list,predicted_labels
82
 
 
83
 
84
+ st.title('Toxic Comments Application')
85
+ st.write('Welcome to my multi label classification app!')
86
 
87
+ #model_name = st.sidebar.selectbox("Select Model",
88
+ #("distilbert-base-uncased-finetuned-sst-2-english",
89
+
90
+ #"finiteautomata/bertweet-base-sentiment-analysis"))
91
+ form = st.form(key='toxic_comments--form')
92
+ user_input = form.text_area('Enter your text')
93
+ submit = form.form_submit_button('Submit')
94
 
95
+ if submit:
96
+ text = user_input
97
+
98
+ problst,labellst = get_text_toxiccom(text)
99
 
100
+ df = {}
101
+ df['LABELS'] = LABEL_COLUMNS
102
+ df['PROBABILITY']= problst
103
+ df['PREDICTED_LABELS'] = labellst
104
 
105
+ outdf = pd.DataFrame.from_dict(df) #fdict)
106
+ st.write(outdf)
107
 
 
 
 
108
 
109