File size: 4,879 Bytes
35f56ba
7749ef6
47ef74f
cfa2b70
cd87a42
 
 
8389a97
 
 
d71bb22
26f6079
cd87a42
26dac8d
cd87a42
26f6079
e43f53b
26f6079
47ef74f
26dac8d
 
8389a97
 
 
26dac8d
 
c704d04
c6e02a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26f6079
 
 
26dac8d
26f6079
 
 
 
 
 
 
 
 
 
 
26dac8d
 
 
 
 
 
 
 
 
 
 
47ef74f
d5b90e7
dff0151
21d64ee
dff0151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26f6079
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
from transformers import BertForMaskedLM

model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")

textIn = st.text_input("Input Text Here:", "I really like the color of your car!")

option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline', 'TextBlob'))

st.write('You selected:', option)

if option == 'MILESTONE 3':

    st.write('test1')

    model_name_0 = "Rathgeberj/milestone3_1"
    # model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
    model_0 = BertForMaskedLM.from_pretrained("Rathgeberj/milestone3_0")
    tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
    classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)

    # model_name_1 = "Rathgeberj/milestone3_1"
    # model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
    # tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
    # classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)

    # model_name_2 = "Rathgeberj/milestone3_2"
    # model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
    # tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
    # classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)

    # model_name_3 = "Rathgeberj/milestone3_3"
    # model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
    # tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
    # classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)

    # model_name_4 = "Rathgeberj/milestone3_4"
    # model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
    # tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
    # classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)

    # model_name_5 = "Rathgeberj/milestone3_5"
    # model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
    # tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
    # classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)

    # models = [model_0, model_1, model_2, model_3, model_4, model_5]
    # tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
    # classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]

    # X_train = [textIn]
    # batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
    st.write('test2')

if option == 'Pipeline':

    model_name = "distilbert-base-uncased-finetuned-sst-2-english"
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
    preds = classifier(textIn)
    preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
    st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])

if option == 'TextBlob':
    polarity = TextBlob(textIn).sentiment.polarity
    subjectivity = TextBlob(textIn).sentiment.subjectivity
    sentiment = ''
    if polarity < 0:
        sentiment = 'Negative'
    elif polarity == 0:
        sentiment = 'Neutral'
    else:
        sentiment = 'Positive'
    st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)


#------------------------------------------------------------------------

# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)


# X_train = [textIn]

# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])

# with torch.no_grad():
#     outputs = model(**batch, labels=torch.tensor([1, 0]))
#     predictions = F.softmax(outputs.logits, dim=1)
#     labels = torch.argmax(predictions, dim=1)
#     labels = [model.config.id2label[label_id] for label_id in labels.tolist()]

# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)

# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)

#------------------------------------------------------------------------