Spaces:

saccharinedreams
/

sentiment-analysis-app

Runtime error

App Files Files Community

sentiment-analysis-app / app.py

benliang99

Added documentation, website details to readme.

f058f94 over 1 year ago

raw

history blame

4.27 kB

	import streamlit as st
	from datasets import load_dataset
	from transformers import pipeline, DistilBertForSequenceClassification, DistilBertTokenizerFast, AutoModelForSequenceClassification, AutoTokenizer, TFAutoModelForSequenceClassification

	# Options for models from transformers library
	MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
	FINETUNED_OPT = MODEL_OPTS[0]
	DEFAULT_OPT = MODEL_OPTS[1]

	# Helper function
	def map_decision_to_string(example):
	return {'decision': decision_to_str[example['decision']]}

	def load_abstracts():
	dataset_dict = load_dataset('HUPD/hupd',
	name='sample',
	data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
	icpr_label=None,
	train_filing_start_date='2016-01-01',
	train_filing_end_date='2016-01-31',
	val_filing_start_date='2016-01-01',
	val_filing_end_date='2016-01-01',
	)
	abstracts = dataset_dict['train']['abstract']
	dataset_dict = []
	return abstracts

	# returns loaded model and tokenizer, if any
	def load_model(opt):
	if opt not in MODEL_OPTS: print("Incorrect model selection. Try again!")
	model, tokenizer = None, None

	# Load the chosen sentiment analysis model from transformers
	if opt == FINETUNED_OPT:
	tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
	model = DistilBertForSequenceClassification.from_pretrained('saccharinedreams/finetuned-distilbert-base-uncased-for-hupd')
	elif opt == DEFAULT_OPT:
	return model, tokenizer
	elif opt == 'bertweet-base-sentiment-analysis':
	tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
	model = AutoModelForSequenceClassification.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
	elif opt == 'twitter-roberta-base-sentiment':
	tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
	model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
	elif opt == 'distilRoberta-financial-sentiment':
	tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
	model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")

	elif not model and not tokenizer:
	print("Model not loaded correctly. Try again!")

	return model, tokenizer

	def sentiment_analysis(model, tokenizer):
	if model and tokenizer:
	return pipeline('text-classification', model=model, tokenizer=tokenizer)
	else: return pipeline('text-classification')

	# Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
	st.title('Finetuned Sentiment Analysis for US Patents')
	st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
	st.markdown('Link to the model - [model repo](https://huggingface.co/saccharinedreams/finetuned-distilbert-base-uncased-for-hupd')
	st.markdown('This model was finetuned on the Harvard USPTO Patent Dataset and uses Distilbert-Base-Uncased.')

	abstracts = load_abstracts()
	print(len(abstracts))
	print(abstracts[0])
	dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
	model, tokenizer = load_model('finetuned')

	# Take in user input
	#user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')

	# The user can interact with a dropdown menu to choose a sentiment analysis model.
	#dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
	#model, tokenizer = load_model(dropdown_value)


	# Perform sentiment analysis on the user's input
	result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)

	# Display the sentiment analysis results
	st.markdown('Labels 0, 1: Not accepted, Accepted')
	st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])