nlp-qual-space-dev

Sleeping

App Files Files Community

nlp-qual-space-dev / overview.py

maxspad

linearized the gauge

8e11190 about 2 years ago

raw

history blame contribute delete

6.67 kB

	from matplotlib.cm import get_cmap
	import plotly.graph_objects as go
	import hydralit_components as hc

	about_blurb = '''
	### About the QuAL Score

	The Quality of Assessment for Learning score (QuAL score),
	was created to evaluate short qualitative comments that are related to specific
	scores entered into a workplace-based assessment,
	common within the competency-based medical education (CBME) context.

	It is rated on a scale of 0-5, with 0 signifying very low quality and 5 very high quality.
	It consists of three subscores which are summed to calculate the overall QuAL score:

	1. Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)
	2. Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)
	3. Connection - Is the rater's suggestion linked to the behavior described? (0-no/1-yes)

	The QuAL score has validity evidence for accurately measuring the quality of evaluation comments in CBME.

	For more information, see the paper [here](https://doi.org/10.1080/10401334.2019.1708365).

	### About this Tool

	The QuAL score accurately rates the quality of narrative comments in CBME, but
	it still requires time-consuming manual rating. With large volumes of text generated in a
	typical CBME program, large-scale assessment of comment quality is impractical.
	This tool uses machine learning (ML) and natural langugage processing (NLP) to automate
	the rating of the QuAL score on narratie comments.

	We trained a machine learning model to predict each of the three subscores described above.
	The resulting models are accurate:
	1. Evidence - Balanced accuracy of 61.5% for a 0-3 result, within-one accuracy of 96.4%
	2. Suggestion - Accuracy of 85%, sensitivity for lack of suggestion 86.2%
	3. Connection - Accuracy of 82%, sensitivity for lack of connection 90%

	The models are highly accurate, but not perfect! You may experience times where
	the results are not consistent with your interpretation of the text. If you do, please
	leave us [feedback](https://forms.gle/PfXxcGmvLYvd9jWz5). This tool is intendened as a demonstration only
	and should not be used for high-stakes assessment (yet!).
	'''
	class NQDOverview(object):
	def __init__(self, parent, results,
	dial_cmap='RdYlGn'):
	self.p = parent
	self.results = results
	self.cmap = get_cmap(dial_cmap)

	def _get_color(self):
	lab = self.results['qual']['label']
	if lab == 0:
	color = '#ffffff'
	elif lab == 1:
	color = '#dc3545'
	elif lab == 2:
	color = '#f60'
	elif lab == 3:
	color = '#ffc107'
	elif lab == 4:
	color = '#6ea728'
	elif lab == 5:
	color = '#28a745'
	# color = self.cmap(self.results['qual']['label'] / 6.0)
	# color = f'rgba({int(color[0]256)}, {int(color[1]256)}, {int(color[2]256)}, {int(color[3]256)})'
	return color

	def _build_figure(self):
	fig = go.Figure(go.Indicator(
	mode = "number+gauge", value = self.results['qual']['label'],
	domain = {'x': [0.1, 1], 'y': [0, 1]},
	title = {'text' :"<b>QuAL:</b>"},
	gauge = {
	'shape': "bullet",
	'axis': {'range': [-0.5, 5.5]},
	'steps': [
	{'range': [-0.5, 0.5], 'color': "maroon"},
	{'range': [0.5, 1.5], 'color': 'indianred'},
	{'range': [1.5, 2.5], 'color': "orange"},
	{"range": [2.5, 3.5], 'color': 'gold'},
	{'range': [3.5,4.5], 'color': 'lightgreen'},
	{'range': [4.5,5.5], 'color': 'green'}
	],
	'bar': {
	'color': 'rgba(123, 123, 123, 0.85)',
	'thickness': 0.7
	}}))
	fig.update_layout(margin=go.Margin(t=25, b=20), height=125)
	return fig

	def draw(self):
	st = self.p

	with st.expander('About the QuAL Score and this Tool', expanded=False):
	st.markdown(about_blurb)

	fig = self._build_figure()
	st.plotly_chart(fig, use_container_width=True)

	cols = st.columns(3)
	with cols[0]:
	q1lab = self.results['q1']['label']
	if q1lab == 0:
	md_str = '😥 None'
	elif q1lab == 1:
	md_str = '😐 Low'
	elif q1lab == 2:
	md_str = '😊 Medium'
	elif q1lab == 3:
	md_str = '😁 High'
	# prog_score, prog_theme = self.get_prog_setup('q1')
	# hc.info_card(title='Level of Detail', content=md_str, sentiment='good', bar_value=prog_score)
	st.metric('Level of Detail', md_str,
	help='Q1 - Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)')
	prog_score, prog_theme = self.get_prog_setup('q1')
	# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)

	with cols[1]:
	q2lab = self.results['q2i']['label']
	if q2lab == 0:
	md_str = '✅ Yes'
	else:
	md_str = '❌ No'
	st.metric('Suggestion Given', (md_str),
	help='Q2 - Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)')
	prog_score, prog_theme = self.get_prog_setup('q2i')
	# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)

	with cols[2]:
	q3lab = self.results['q3i']['label']
	if q3lab == 0:
	md_str = '✅ Yes'
	else:
	md_str = '❌ No'
	st.metric('Suggestion Linked', md_str,
	help='Q3 - Connection - Is the rater’s suggestion linked to the behavior described? (0-no/1-yes)')
	prog_score, prog_theme = self.get_prog_setup('q3i')
	# hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)


	def get_prog_setup(self, q):
	prog_score = self.results[q]['scores'][self.results[q]['label']] * 100
	if prog_score > 75:
	prog_sent = '#28a745'
	elif (prog_score > 25) and (prog_score <= 75):
	prog_sent = '#ffc107'
	else:
	prog_sent = '#dc3545'
	prog_theme = {'content_color': 'white', 'progress_color': '#aaa'}
	return prog_score, prog_theme