Spaces:

ashishraics
/

NLP

Runtime error

App Files Files Community

NLP / app.py

ashishraics

optimize app

8bb7965 about 2 years ago

raw history blame

No virus

9.55 kB

	import numpy as np
	import pandas as pd
	import streamlit as st
	from streamlit_text_rating.st_text_rater import st_text_rater
	from transformers import AutoTokenizer,AutoModelForSequenceClassification
	import onnxruntime as ort
	import os
	import time
	import plotly.express as px
	import plotly.graph_objects as go
	global _plotly_config
	_plotly_config={'displayModeBar': False}

	from sentiment_clf_helper import classify_sentiment,create_onnx_model_sentiment,classify_sentiment_onnx
	from zeroshot_clf_helper import zero_shot_classification,create_onnx_model_zs,zero_shot_classification_onnx

	import yaml
	def read_yaml(file_path):
	with open(file_path, "r") as f:
	return yaml.safe_load(f)

	config = read_yaml('config.yaml')

	sent_chkpt=config['SENTIMENT_CLF']['sent_chkpt']
	sent_mdl_dir=config['SENTIMENT_CLF']['sent_mdl_dir']
	sent_onnx_mdl_dir=config['SENTIMENT_CLF']['sent_onnx_mdl_dir']
	sent_onnx_mdl_name=config['SENTIMENT_CLF']['sent_onnx_mdl_name']
	sent_onnx_quant_mdl_name=config['SENTIMENT_CLF']['sent_onnx_quant_mdl_name']

	zs_chkpt=config['ZEROSHOT_CLF']['zs_chkpt']
	zs_mdl_dir=config['ZEROSHOT_CLF']['zs_mdl_dir']
	zs_onnx_mdl_dir=config['ZEROSHOT_CLF']['zs_onnx_mdl_dir']
	zs_onnx_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_mdl_name']
	zs_onnx_quant_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_quant_mdl_name']


	st.set_page_config( # Alternate names: setup_page, page, layout
	layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
	initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
	page_title='None', # String or None. Strings get appended with "• Streamlit".
	)


	padding_top = 0
	st.markdown(f"""
	<style>
	.reportview-container .main .block-container{{
	padding-top: {padding_top}rem;
	}}
	</style>""",
	unsafe_allow_html=True,
	)

	def set_page_title(title):
	st.sidebar.markdown(unsafe_allow_html=True, body=f"""
	<iframe height=0 srcdoc="<script>
	const title = window.parent.document.querySelector('title') \

	const oldObserver = window.parent.titleObserver
	if (oldObserver) {{
	oldObserver.disconnect()
	}} \

	const newObserver = new MutationObserver(function(mutations) {{
	const target = mutations[0].target
	if (target.text !== '{title}') {{
	target.text = '{title}'
	}}
	}}) \

	newObserver.observe(title, {{ childList: true }})
	window.parent.titleObserver = newObserver \

	title.text = '{title}'
	</script>" />
	""")


	set_page_title('NLP use cases')

	# Hide Menu Option
	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)


	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def create_model_dir(chkpt, model_dir):
	if not os.path.exists(model_dir):
	try:
	os.mkdir(path=model_dir)
	except:
	pass
	_model = AutoModelForSequenceClassification.from_pretrained(chkpt)
	_tokenizer = AutoTokenizer.from_pretrained(chkpt)
	_model.save_pretrained(model_dir)
	_tokenizer.save_pretrained(model_dir)
	else:
	pass


	#title using markdown
	st.markdown("<h1 style='text-align: center; color: #3366ff;'>NLP Basic Use Cases</h1>", unsafe_allow_html=True)
	st.markdown("---")
	with st.sidebar:
	# title using markdown
	st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
	select_task=st.selectbox(label="Select task from drop down menu",
	options=['README',
	'Detect Sentiment','Zero Shot Classification'])

	############### Pre-Download & instantiate objects for sentiment analysis ********************* START ********************

	# #create model/token dir for sentiment classification for faster inference
	create_model_dir(chkpt=sent_chkpt, model_dir=sent_mdl_dir)


	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def sentiment_task_selected(task,
	sent_chkpt=sent_chkpt,
	sent_mdl_dir=sent_mdl_dir,
	sent_onnx_mdl_dir=sent_onnx_mdl_dir,
	sent_onnx_mdl_name=sent_onnx_mdl_name,
	sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
	#model & tokenizer initialization for normal sentiment classification
	# model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_chkpt)
	# tokenizer_sentiment=AutoTokenizer.from_pretrained(sent_chkpt)
	tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)

	# # create onnx model for sentiment classification but once created in your local app comment this out
	# create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)

	#create inference session
	sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}")
	# sentiment_session_quant = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_quant_mdl_name}")

	return tokenizer_sentiment,sentiment_session

	############## Pre-Download & instantiate objects for sentiment analysis ******************* END ********************************


	############### Pre-Download & instantiate objects for Zero shot clf ********************* START ********************

	# create model/token dir for zeroshot clf -- already created so not required
	create_model_dir(chkpt=zs_chkpt, model_dir=zs_mdl_dir)

	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def zs_task_selected(task,
	zs_chkpt=zs_chkpt ,
	zs_mdl_dir=zs_mdl_dir,
	zs_onnx_mdl_dir=zs_onnx_mdl_dir,
	zs_onnx_mdl_name=zs_onnx_mdl_name,
	zs_onnx_quant_mdl_name=zs_onnx_quant_mdl_name):

	##model & tokenizer initialization for normal ZS classification
	# model_zs=AutoModelForSequenceClassification.from_pretrained(zs_chkpt)
	# we just need tokenizer for inference and not model since onnx model is already saved
	# tokenizer_zs=AutoTokenizer.from_pretrained(zs_chkpt)
	tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)

	# # create onnx model for zeroshot but once created locally comment it out.
	# create_onnx_model_zs()

	#create inference session from onnx model
	zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}")
	# zs_session_quant = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_quant_mdl_name}")

	return tokenizer_zs,zs_session

	############## Pre-Download & instantiate objects for Zero shot analysis ******************* END ********************************

	if select_task=='README':
	st.header("NLP Summary")
	# st.write()

	if select_task == 'Detect Sentiment':
	t1=time.time()
	tokenizer_sentiment,sentiment_session = sentiment_task_selected(task=select_task)
	t2 = time.time()
	st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")

	st.header("You are now performing Sentiment Analysis")
	input_texts = st.text_input(label="Input texts separated by comma")
	c1,c2,_,_=st.columns(4)

	with c1:
	response1=st.button("Compute (ONNX runtime)")

	if response1:
	start = time.time()
	sentiments=classify_sentiment_onnx(input_texts,
	_session=sentiment_session,
	_tokenizer=tokenizer_sentiment)
	end = time.time()
	st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")

	for i,t in enumerate(input_texts.split(',')):
	if sentiments[i]=='Positive':
	response=st_text_rater(t + f"--> This statement is {sentiments[i]}",
	color_background='rgb(154,205,50)',key=t)
	else:
	response = st_text_rater(t + f"--> This statement is {sentiments[i]}",
	color_background='rgb(233, 116, 81)',key=t)
	else:
	pass

	if select_task=='Zero Shot Classification':
	t1=time.time()
	tokenizer_zs,zs_session = zs_task_selected(task=select_task)
	t2 = time.time()
	st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")

	st.header("You are now performing Zero Shot Classification")
	input_texts = st.text_input(label="Input text to classify into topics")
	input_lables = st.text_input(label="Enter labels separated by commas")

	c1,_,_,_=st.columns(4)

	with c1:
	response1=st.button("Compute (ONNX runtime)")

	if response1:
	start = time.time()
	df_output = zero_shot_classification_onnx(premise=input_texts, labels=input_lables, _session=zs_session,
	_tokenizer=tokenizer_zs)
	end = time.time()
	st.write("")
	st.write(f"Time taken for computation {(end-start)*1000:.1f} ms")
	fig = px.bar(x='Probability',
	y='labels',
	text='Probability',
	data_frame=df_output,
	title='Zero Shot Normalized Probabilities')

	st.plotly_chart(fig, config=_plotly_config)
	else:
	pass