Jeffrey Rathgeber Jr
testfunc
8389a97 unverified
raw
history blame
4.88 kB
import streamlit as st
import tensorflow as tf
from transformers import pipeline
from textblob import TextBlob
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
from transformers import BertForMaskedLM
model = BertForMaskedLM.from_pretrained("remi/bertabs-finetuned-extractive-abstractive-summarization")
textIn = st.text_input("Input Text Here:", "I really like the color of your car!")
option = st.selectbox('Which pre-trained model would you like for your sentiment analysis?',('MILESTONE 3', 'Pipeline', 'TextBlob'))
st.write('You selected:', option)
if option == 'MILESTONE 3':
st.write('test1')
model_name_0 = "Rathgeberj/milestone3_1"
# model_0 = AutoModelForSequenceClassification.from_pretrained(model_name_0)
model_0 = BertForMaskedLM.from_pretrained("Rathgeberj/milestone3_0")
tokenizer_0 = AutoTokenizer.from_pretrained(model_name_0)
classifier_0 = pipeline(task="sentiment-analysis", model=model_0, tokenizer=tokenizer_0)
# model_name_1 = "Rathgeberj/milestone3_1"
# model_1 = AutoModelForSequenceClassification.from_pretrained(model_name_1)
# tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)
# classifier_1 = pipeline(task="sentiment-analysis", model=model_1, tokenizer=tokenizer_1)
# model_name_2 = "Rathgeberj/milestone3_2"
# model_2 = AutoModelForSequenceClassification.from_pretrained(model_name_2)
# tokenizer_2 = AutoTokenizer.from_pretrained(model_name_2)
# classifier_2 = pipeline(task="sentiment-analysis", model=model_2, tokenizer=tokenizer_2)
# model_name_3 = "Rathgeberj/milestone3_3"
# model_3 = AutoModelForSequenceClassification.from_pretrained(model_name_3)
# tokenizer_3 = AutoTokenizer.from_pretrained(model_name_3)
# classifier_3 = pipeline(task="sentiment-analysis", model=model_3, tokenizer=tokenizer_3)
# model_name_4 = "Rathgeberj/milestone3_4"
# model_4 = AutoModelForSequenceClassification.from_pretrained(model_name_4)
# tokenizer_4 = AutoTokenizer.from_pretrained(model_name_4)
# classifier_4 = pipeline(task="sentiment-analysis", model=model_4, tokenizer=tokenizer_4)
# model_name_5 = "Rathgeberj/milestone3_5"
# model_5 = AutoModelForSequenceClassification.from_pretrained(model_name_5)
# tokenizer_5 = AutoTokenizer.from_pretrained(model_name_5)
# classifier_5 = pipeline(task="sentiment-analysis", model=model_5, tokenizer=tokenizer_5)
# models = [model_0, model_1, model_2, model_3, model_4, model_5]
# tokenizers = [tokenizer_0, tokenizer_1, tokenizer_2, tokenizer_3, tokenizer_4, tokenizer_5]
# classifiers = [classifier_0, classifier_1, classifier_2, classifier_3, classifier_4, classifier_5]
# X_train = [textIn]
# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
st.write('test2')
if option == 'Pipeline':
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier = pipeline(task="sentiment-analysis", model=model, tokenizer=tokenizer)
preds = classifier(textIn)
preds = [{"score": round(pred["score"], 4), "label": pred["label"]} for pred in preds]
st.write('According to Pipeline, input text is ', preds[0]['label'], ' with a confidence of ', preds[0]['score'])
if option == 'TextBlob':
polarity = TextBlob(textIn).sentiment.polarity
subjectivity = TextBlob(textIn).sentiment.subjectivity
sentiment = ''
if polarity < 0:
sentiment = 'Negative'
elif polarity == 0:
sentiment = 'Neutral'
else:
sentiment = 'Positive'
st.write('According to TextBlob, input text is ', sentiment, ' and a subjectivity score (from 0 being objective to 1 being subjective) of ', subjectivity)
#------------------------------------------------------------------------
# tokens = tokenizer.tokenize(textIn)
# token_ids = tokenizer.convert_tokens_to_ids(tokens)
# input_ids = tokenizer(textIn)
# X_train = [textIn]
# batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt")
# # batch = torch.tensor(batchbatch["input_ids"])
# with torch.no_grad():
# outputs = model(**batch, labels=torch.tensor([1, 0]))
# predictions = F.softmax(outputs.logits, dim=1)
# labels = torch.argmax(predictions, dim=1)
# labels = [model.config.id2label[label_id] for label_id in labels.tolist()]
# # save_directory = "saved"
# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)
# tokenizer = AutoTokenizer.from_pretrained(save_directory)
# model = AutoModelForSequenceClassification.from_pretrained(save_directory)
#------------------------------------------------------------------------