Spaces:
Runtime error
Runtime error
File size: 3,923 Bytes
55bdad4 480b6a8 55bdad4 03251df 2d7ed25 284129c 55bdad4 1ce306e ee38f19 55bdad4 03251df 96db83a a8e4a85 55bdad4 8b71a2e 55bdad4 ee38f19 55bdad4 a8e4a85 55bdad4 a8e4a85 55bdad4 2d7ed25 5a40ca9 55bdad4 284129c 2d7ed25 55bdad4 2d7ed25 55bdad4 2d7ed25 55bdad4 2d7ed25 55bdad4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import streamlit as st
from transformers import pipeline, DistilBertForSequenceClassification, DistilBertTokenizerFast, AutoModelForSequenceClassification, AutoTokenizer, TFAutoModelForSequenceClassification
# Options for models from transformers library
MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
FINETUNED_OPT = MODEL_OPTS[0]
DEFAULT_OPT = MODEL_OPTS[1]
# Helper function
def map_decision_to_string(example):
return {'decision': decision_to_str[example['decision']]}
def load_abstracts():
dataset_dict = load_dataset('HUPD/hupd',
name='sample',
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
icpr_label=None,
train_filing_start_date='2016-01-01',
train_filing_end_date='2016-01-31',
val_filing_start_date='2016-01-01',
val_filing_end_date='2016-01-01',
)
dataset_dict = [] # free up space
return dataset_dict['train']['abstract']
# returns loaded model and tokenizer, if any
def load_model(opt):
if opt not in MODEL_OPTS: print("Incorrect model selection. Try again!")
model, tokenizer = None, None
# Load the chosen sentiment analysis model from transformers
if opt == FINETUNED_OPT:
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('saccharinedreams/finetuned-distilbert-base-uncased-for-hupd')
elif opt == DEFAULT_OPT:
return model, tokenizer
elif opt == 'bertweet-base-sentiment-analysis':
tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
elif opt == 'twitter-roberta-base-sentiment':
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
elif opt == 'distilRoberta-financial-sentiment':
tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
elif not model and not tokenizer:
print("Model not loaded correctly. Try again!")
return model, tokenizer
def sentiment_analysis(model, tokenizer):
if model and tokenizer:
return pipeline('text-classification', model=model, tokenizer=tokenizer)
else: return pipeline('text-classification')
# Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
st.title('Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)')
st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
abstracts = load_abstracts()
dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
model, tokenizer = load_model('finetuned')
# Take in user input
#user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
# The user can interact with a dropdown menu to choose a sentiment analysis model.
#dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
#model, tokenizer = load_model(dropdown_value)
# Perform sentiment analysis on the user's input
result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)
# Display the sentiment analysis results
st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])
|