Spaces:

ashishraics
/

NLP

Runtime error

App Files Files Community

NLP / app.py

ashishraics

added example

4cd85c3 over 2 years ago

raw

history blame

16.2 kB

	import numpy as np
	import pandas as pd
	import streamlit as st
	from streamlit_text_rating.st_text_rater import st_text_rater
	from transformers import AutoTokenizer,AutoModelForSequenceClassification
	from transformers import AutoModelForMaskedLM
	import onnxruntime as ort
	import os
	import time
	import plotly.express as px
	import plotly.graph_objects as go
	from PIL import Image
	global _plotly_config
	_plotly_config={'displayModeBar': False}

	from sentiment_clf_helper import (classify_sentiment,
	create_onnx_model_sentiment,
	classify_sentiment_onnx)

	from zeroshot_clf_helper import (zero_shot_classification,
	create_onnx_model_zs_nli,
	create_onnx_model_zs_mlm,
	zero_shot_classification_nli_onnx,
	zero_shot_classification_fillmask_onnx)

	import multiprocessing
	total_threads=multiprocessing.cpu_count()#for ort inference

	import yaml
	def read_yaml(file_path):
	with open(file_path, "r") as f:
	return yaml.safe_load(f)

	config = read_yaml('config.yaml')

	sent_chkpt=config['SENTIMENT_CLF']['sent_chkpt']
	sent_mdl_dir=config['SENTIMENT_CLF']['sent_mdl_dir']
	sent_onnx_mdl_dir=config['SENTIMENT_CLF']['sent_onnx_mdl_dir']
	sent_onnx_mdl_name=config['SENTIMENT_CLF']['sent_onnx_mdl_name']
	sent_onnx_quant_mdl_name=config['SENTIMENT_CLF']['sent_onnx_quant_mdl_name']

	zs_chkpt=config['ZEROSHOT_CLF']['zs_chkpt']
	zs_mdl_dir=config['ZEROSHOT_CLF']['zs_mdl_dir']
	zs_onnx_mdl_dir=config['ZEROSHOT_CLF']['zs_onnx_mdl_dir']
	zs_onnx_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_mdl_name']
	zs_onnx_quant_mdl_name=config['ZEROSHOT_CLF']['zs_onnx_quant_mdl_name']

	zs_mlm_chkpt=config['ZEROSHOT_MLM']['zs_mlm_chkpt']
	zs_mlm_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_mdl_dir']
	zs_mlm_onnx_mdl_dir=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_dir']
	zs_mlm_onnx_mdl_name=config['ZEROSHOT_MLM']['zs_mlm_onnx_mdl_name']

	st.set_page_config( # Alternate names: setup_page, page, layout
	layout="wide", # Can be "centered" or "wide". In the future also "dashboard", etc.
	initial_sidebar_state="auto", # Can be "auto", "expanded", "collapsed"
	page_title='None', # String or None. Strings get appended with "• Streamlit".
	)

	padding_top = 0
	st.markdown(f"""
	<style>
	.reportview-container .main .block-container{{
	padding-top: {padding_top}rem;
	}}
	</style>""",
	unsafe_allow_html=True,
	)

	def set_page_title(title):
	st.sidebar.markdown(unsafe_allow_html=True, body=f"""
	<iframe height=0 srcdoc="<script>
	const title = window.parent.document.querySelector('title') \

	const oldObserver = window.parent.titleObserver
	if (oldObserver) {{
	oldObserver.disconnect()
	}} \

	const newObserver = new MutationObserver(function(mutations) {{
	const target = mutations[0].target
	if (target.text !== '{title}') {{
	target.text = '{title}'
	}}
	}}) \

	newObserver.observe(title, {{ childList: true }})
	window.parent.titleObserver = newObserver \

	title.text = '{title}'
	</script>" />
	""")


	set_page_title('NLP use cases')

	# Hide Menu Option
	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)

	#onnx runtime inference threading changes -- session options must be passed in session run
	# os.environ["OMP_NUM_THREADS"] = "1" #use this before changing session options of onnx runtime
	session_options_ort = ort.SessionOptions()
	session_options_ort.intra_op_num_threads=1
	session_options_ort.inter_op_num_threads=1
	# session_options_ort.execution_mode = session_options_ort.ExecutionMode.ORT_SEQUENTIAL

	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def create_model_dir(chkpt, model_dir,task_type):
	if not os.path.exists(model_dir):
	try:
	os.mkdir(path=model_dir)
	except:
	pass
	if task_type=='classification':
	_model = AutoModelForSequenceClassification.from_pretrained(chkpt)
	_tokenizer = AutoTokenizer.from_pretrained(chkpt)
	_model.save_pretrained(model_dir)
	_tokenizer.save_pretrained(model_dir)
	elif task_type=='mlm':
	_model=AutoModelForMaskedLM.from_pretrained(chkpt)
	_tokenizer=AutoTokenizer.from_pretrained(chkpt)
	_model.save_pretrained(model_dir)
	_tokenizer.save_pretrained(model_dir)
	else:
	pass
	else:
	pass


	#title using markdown
	st.markdown("<h1 style='text-align: center; color: #3366ff;'>NLP Basic Use Cases</h1>", unsafe_allow_html=True)
	st.markdown("---")
	with st.sidebar:
	# title using markdown
	st.markdown("<h1 style='text-align: left; color: ;'>NLP Tasks</h1>", unsafe_allow_html=True)
	select_task=st.selectbox(label="Select task from drop down menu",
	options=['README',
	'Detect Sentiment','Zero Shot Classification'])

	############### Pre-Download & instantiate objects for sentiment analysis ********************* START ********************

	# #create model/token dir for sentiment classification for faster inference
	# create_model_dir(chkpt=sent_chkpt, model_dir=sent_mdl_dir,task_type='classification')


	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def sentiment_task_selected(task,
	sent_chkpt=sent_chkpt,
	sent_mdl_dir=sent_mdl_dir,
	sent_onnx_mdl_dir=sent_onnx_mdl_dir,
	sent_onnx_mdl_name=sent_onnx_mdl_name,
	sent_onnx_quant_mdl_name=sent_onnx_quant_mdl_name):
	##model & tokenizer initialization for normal sentiment classification
	# model_sentiment=AutoModelForSequenceClassification.from_pretrained(sent_mdl_dir)
	tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)

	# # # create onnx model for sentiment classification but once created in your local app comment this out
	# create_onnx_model_sentiment(_model=model_sentiment, _tokenizer=tokenizer_sentiment)

	#create inference session
	sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}",sess_options=session_options_ort)
	# sentiment_session_quant = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_quant_mdl_name}")

	return tokenizer_sentiment,sentiment_session

	############## Pre-Download & instantiate objects for sentiment analysis ******************* END ********************************


	############### Pre-Download & instantiate objects for Zero shot clf NLI ********************* START ********************

	# # create model/token dir for zeroshot clf -- already created so not required
	# create_model_dir(chkpt=zs_chkpt, model_dir=zs_mdl_dir,task_type='classification')

	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def zs_nli_task_selected(task,
	zs_chkpt ,
	zs_mdl_dir,
	zs_onnx_mdl_dir,
	zs_onnx_mdl_name):

	##model & tokenizer initialization for normal ZS classification
	# model_zs=AutoModelForSequenceClassification.from_pretrained(zs_chkpt)
	# we just need tokenizer for inference and not model since onnx model is already saved
	# tokenizer_zs=AutoTokenizer.from_pretrained(zs_chkpt)
	tokenizer_zs = AutoTokenizer.from_pretrained(zs_mdl_dir)

	# ## create onnx model for zeroshot but once created locally comment it out.
	# create_onnx_model_zs_nli(zs_chkpt=zs_chkpt,zs_onnx_mdl_dir=zs_onnx_mdl_dir)

	#create inference session from onnx model
	zs_session = ort.InferenceSession(f"{zs_onnx_mdl_dir}/{zs_onnx_mdl_name}",sess_options=session_options_ort)

	return tokenizer_zs,zs_session

	############## Pre-Download & instantiate objects for Zero shot NLI analysis ******************* END ********************************


	############### Pre-Download & instantiate objects for Zero shot clf NLI ********************* START ********************

	## create model/token dir for zeroshot clf -- already created so not required
	# create_model_dir(chkpt=zs_mlm_chkpt, model_dir=zs_mlm_mdl_dir, task_type='mlm')

	@st.cache(allow_output_mutation=True, suppress_st_warning=True, max_entries=None, ttl=None)
	def zs_mlm_task_selected(task,
	zs_mlm_chkpt=zs_mlm_chkpt,
	zs_mlm_mdl_dir=zs_mlm_mdl_dir,
	zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir,
	zs_mlm_onnx_mdl_name=zs_mlm_onnx_mdl_name):
	##model & tokenizer initialization for normal ZS classification
	# model_zs_mlm=AutoModelForMaskedLM.from_pretrained(zs_mlm_mdl_dir)
	##we just need tokenizer for inference and not model since onnx model is already saved
	# tokenizer_zs_mlm=AutoTokenizer.from_pretrained(zs_mlm_chkpt)
	tokenizer_zs_mlm = AutoTokenizer.from_pretrained(zs_mlm_mdl_dir)

	# # # create onnx model for zeroshot but once created locally comment it out.
	# create_onnx_model_zs_mlm(zs_mlm_chkpt=zs_mlm_chkpt,zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir)

	# create inference session from onnx model
	zs_session_mlm = ort.InferenceSession(f"{zs_mlm_onnx_mdl_dir}/{zs_mlm_onnx_mdl_name}", sess_options=session_options_ort)

	return tokenizer_zs_mlm, zs_session_mlm


	############## Pre-Download & instantiate objects for Zero shot MLM analysis ******************* END ********************************

	# Image.open('hf_space1.png').convert('RGB').save('hf_space1.png')
	img = Image.open("hf_space1.png")

	if select_task=='README':
	st.header("NLP Summary")
	st.write(f"The App gives you ability to 1) Detect Sentiment, 2) Zeroshot Classification.Currently.It has {total_threads} CPU cores but only 1 is available per user so "
	f"inference time will be on the higher side.")
	st.markdown("---")
	st.image(img)

	if select_task == 'Detect Sentiment':
	t1=time.time()
	tokenizer_sentiment,sentiment_session = sentiment_task_selected(task=select_task)
	##below 2 steps are slower as caching is not enabled
	# tokenizer_sentiment = AutoTokenizer.from_pretrained(sent_mdl_dir)
	# sentiment_session = ort.InferenceSession(f"{sent_onnx_mdl_dir}/{sent_onnx_mdl_name}")
	t2 = time.time()
	st.write(f"Total time to load Model is {(t2-t1)*1000:.1f} ms")

	st.subheader("You are now performing Sentiment Analysis")
	input_texts = st.text_input(label="Input texts separated by comma")
	c1,c2,_,_=st.columns(4)

	with c1:
	response1=st.button("Compute (ONNX runtime)")

	if response1:
	start = time.time()
	sentiments=classify_sentiment_onnx(input_texts,
	_session=sentiment_session,
	_tokenizer=tokenizer_sentiment)
	end = time.time()
	st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")

	for i,t in enumerate(input_texts.split(',')):
	if sentiments[i]=='Positive':
	response=st_text_rater(t + f"--> This statement is {sentiments[i]}",
	color_background='rgb(154,205,50)',key=t)
	else:
	response = st_text_rater(t + f"--> This statement is {sentiments[i]}",
	color_background='rgb(233, 116, 81)',key=t)
	else:
	pass

	if select_task=='Zero Shot Classification':
	t1=time.time()
	tokenizer_zs,session_zs = zs_nli_task_selected(task=select_task ,
	zs_chkpt=zs_chkpt,
	zs_mdl_dir=zs_mdl_dir,
	zs_onnx_mdl_dir=zs_onnx_mdl_dir,
	zs_onnx_mdl_name=zs_onnx_mdl_name)
	t2 = time.time()
	st.write(f"Total time to load NLI Model is {(t2-t1)*1000:.1f} ms")

	t1=time.time()
	tokenizer_zs_mlm,session_zs_mlm = zs_mlm_task_selected(task=select_task,
	zs_mlm_chkpt=zs_mlm_chkpt,
	zs_mlm_mdl_dir=zs_mlm_mdl_dir,
	zs_mlm_onnx_mdl_dir=zs_mlm_onnx_mdl_dir,
	zs_mlm_onnx_mdl_name=zs_mlm_onnx_mdl_name)
	t2 = time.time()
	st.write(f"Total time to load MLM Model is {(t2-t1)*1000:.1f} ms")

	st.subheader("Zero Shot Classification using NLI & MLM")

	default_paratext=""" A molar tooth from Southeast Asia probably belonged to a member of a cryptic group of Stone Age hominids called Denisovans, researchers say. If so, this relatively large tooth joins only a handful of fossils from Denisovans, who are known from ancient DNA pegging them as close Neandertal relatives. Analyses of the tooth’s internal structure and protein makeup indicate that the molar came from a girl in the Homo genus. She died between the ages of 3½ and 8½, paleoanthropologist Fabrice Demeter of the University of Copenhagen and colleagues say."""
	default_labels="""science, politics,sports"""
	input_texts = st.text_area(label="Input text to classify into topics",
	height=250, max_chars=1000,
	value=default_paratext)
	input_lables = st.text_input(label="Enter labels separated by commas",value=default_labels)
	input_hypothesis = st.text_input(label="Enter your hypothesis",value="This is an example of")

	c1,c2,_,=st.columns(3)

	with c1:
	response1=st.button("Compute using NLI approach (ONNX runtime)")

	with c2:
	response2=st.button("Compute using Fill-Mask approach(ONNX runtime)")

	if response1:
	start = time.time()
	df_output = zero_shot_classification_nli_onnx(premise=input_texts,
	labels=input_lables,
	hypothesis=input_hypothesis,
	_session=session_zs,
	_tokenizer=tokenizer_zs,
	)
	end = time.time()
	st.write(f"Time taken for computation {(end-start)*1000:.1f} ms")
	fig = px.bar(x='Probability',
	y='labels',
	text='Probability',
	data_frame=df_output,
	title='Zero Shot NLI Normalized Probabilities')

	st.plotly_chart(fig, config=_plotly_config)

	elif response2:
	start=time.time()
	df_output=zero_shot_classification_fillmask_onnx(premise=input_texts,
	labels=input_lables,
	hypothesis=input_hypothesis,
	_session=session_zs_mlm,
	_tokenizer=tokenizer_zs_mlm,
	)
	end=time.time()
	st.write(f"Time taken for computation {(end - start) * 1000:.1f} ms")
	st.write(f"Currently hypothesis and premise have single token_type_ids ."
	f"Once updated for different token_type_ids expect the model performance to increase.")

	fig = px.bar(x='Probability',
	y='Labels',
	text='Probability',
	data_frame=df_output,
	title='Zero Shot MLM Normalized Probabilities')

	st.plotly_chart(fig, config=_plotly_config)
	else:
	pass