Spaces:

saccharinedreams
/

sentiment-analysis-app

Runtime error

File size: 3,923 Bytes

55bdad4
480b6a8
55bdad4
 
03251df
 
 
2d7ed25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284129c
55bdad4
 
 
1ce306e
ee38f19
55bdad4
 
03251df
 
 
96db83a
a8e4a85
55bdad4
 
 
 
 
 
8b71a2e
55bdad4
 
 
ee38f19
55bdad4
 
 
 
 
a8e4a85
55bdad4
a8e4a85
55bdad4
2d7ed25
 
5a40ca9
55bdad4
284129c
2d7ed25
 
 
55bdad4
2d7ed25
55bdad4
 
2d7ed25
 
 
55bdad4
 
2d7ed25
55bdad4

import streamlit as st
from transformers import pipeline, DistilBertForSequenceClassification, DistilBertTokenizerFast, AutoModelForSequenceClassification, AutoTokenizer, TFAutoModelForSequenceClassification

# Options for models from transformers library
MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
FINETUNED_OPT = MODEL_OPTS[0]
DEFAULT_OPT = MODEL_OPTS[1]

# Helper function
def map_decision_to_string(example):
    return {'decision': decision_to_str[example['decision']]}

def load_abstracts():
    dataset_dict = load_dataset('HUPD/hupd',
        name='sample',
        data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", 
        icpr_label=None,
        train_filing_start_date='2016-01-01',
        train_filing_end_date='2016-01-31',
        val_filing_start_date='2016-01-01',
        val_filing_end_date='2016-01-01',
    )
    dataset_dict = [] # free up space
    return dataset_dict['train']['abstract']

# returns loaded model and tokenizer, if any
def load_model(opt):
    if opt not in MODEL_OPTS: print("Incorrect model selection. Try again!")
    model, tokenizer = None, None

    # Load the chosen sentiment analysis model from transformers
    if opt == FINETUNED_OPT:
        tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        model = DistilBertForSequenceClassification.from_pretrained('saccharinedreams/finetuned-distilbert-base-uncased-for-hupd')
    elif opt == DEFAULT_OPT:
        return model, tokenizer
    elif opt == 'bertweet-base-sentiment-analysis':
        tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
        model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
    elif opt == 'twitter-roberta-base-sentiment':
        tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
        model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
    elif opt == 'distilRoberta-financial-sentiment':
        tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
        model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
    
    elif not model and not tokenizer: 
        print("Model not loaded correctly. Try again!")

    return model, tokenizer

def sentiment_analysis(model, tokenizer):
    if model and tokenizer:
        return pipeline('text-classification', model=model, tokenizer=tokenizer)
    else: return pipeline('text-classification')

# Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
st.title('Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)')
st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')

abstracts = load_abstracts()
dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
model, tokenizer = load_model('finetuned')

# Take in user input
#user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')

# The user can interact with a dropdown menu to choose a sentiment analysis model.
#dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
#model, tokenizer = load_model(dropdown_value)


# Perform sentiment analysis on the user's input
result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)

# Display the sentiment analysis results
st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])