File size: 6,674 Bytes
5cc9296
 
2444d8c
5cc9296
7e4e512
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc9296
 
 
 
 
 
 
 
2444d8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc9296
 
 
 
8e11190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cc9296
 
 
 
 
7e4e512
 
 
5cc9296
8e11190
5cc9296
8e11190
 
5cc9296
 
 
 
 
 
 
 
 
dc45408
 
2444d8c
dc45408
2444d8c
dc45408
5cc9296
8e11190
5cc9296
 
 
 
 
2444d8c
7e4e512
2444d8c
dc45408
5cc9296
8e11190
5cc9296
 
 
 
 
2444d8c
 
 
dc45408
2444d8c
 
 
dc45408
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from matplotlib.cm import get_cmap
import plotly.graph_objects as go
import hydralit_components as hc

about_blurb = '''
### About the QuAL Score

The Quality of Assessment for Learning score (QuAL score), 
was created to evaluate short qualitative comments that are related to specific 
scores entered into a workplace-based assessment, 
common within the competency-based medical education (CBME) context.

It is rated on a scale of 0-5, with 0 signifying very low quality and 5 very high quality.
It consists of three subscores which are summed to calculate the overall QuAL score:

1. Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)
2. Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)
3. Connection - Is the rater's suggestion linked to the behavior described? (0-no/1-yes)

The QuAL score has validity evidence for accurately measuring the quality of evaluation comments in CBME.

For more information, see the paper [here](https://doi.org/10.1080/10401334.2019.1708365).

### About this Tool

The QuAL score accurately rates the quality of narrative comments in CBME, but 
it still requires time-consuming manual rating. With large volumes of text generated in a 
typical CBME program, large-scale assessment of comment quality is impractical. 
This tool uses machine learning (ML) and natural langugage processing (NLP) to automate
the rating of the QuAL score on narratie comments. 

We trained a machine learning model to predict each of the three subscores described above.
The resulting models are accurate:
1. Evidence - Balanced accuracy of 61.5% for a 0-3 result, within-one accuracy of 96.4%
2. Suggestion - Accuracy of 85%, sensitivity for lack of suggestion 86.2%
3. Connection - Accuracy of 82%, sensitivity for lack of connection 90%

The models are highly accurate, but not perfect! You may experience times where
the results are not consistent with your interpretation of the text. If you do, please
leave us [feedback](https://forms.gle/PfXxcGmvLYvd9jWz5). This tool is intendened as a demonstration only 
and should not be used for high-stakes assessment (yet!).
'''
class NQDOverview(object):
    def __init__(self, parent, results, 
        dial_cmap='RdYlGn'):
        self.p = parent
        self.results = results
        self.cmap = get_cmap(dial_cmap)

    def _get_color(self):
        lab = self.results['qual']['label']
        if lab == 0:
            color = '#ffffff'
        elif lab == 1:
            color = '#dc3545'
        elif lab == 2:
            color = '#f60'
        elif lab == 3:
            color = '#ffc107'
        elif lab == 4:
            color = '#6ea728'
        elif lab == 5:
            color = '#28a745'
        # color = self.cmap(self.results['qual']['label'] / 6.0)
        # color = f'rgba({int(color[0]*256)}, {int(color[1]*256)}, {int(color[2]*256)}, {int(color[3]*256)})'
        return color 

    def _build_figure(self):
        fig = go.Figure(go.Indicator(
            mode = "number+gauge", value = self.results['qual']['label'],
            domain = {'x': [0.1, 1], 'y': [0, 1]},
            title = {'text' :"<b>QuAL:</b>"},
            gauge = {
                'shape': "bullet",
                'axis': {'range': [-0.5, 5.5]},
                'steps': [
                    {'range': [-0.5, 0.5], 'color': "maroon"},
                    {'range': [0.5, 1.5], 'color': 'indianred'},
                    {'range': [1.5, 2.5], 'color': "orange"},
                    {"range": [2.5, 3.5], 'color': 'gold'},
                    {'range': [3.5,4.5], 'color': 'lightgreen'},
                    {'range': [4.5,5.5], 'color': 'green'}
                ],
                'bar': {
                    'color': 'rgba(123, 123, 123, 0.85)',
                    'thickness': 0.7
                }}))
        fig.update_layout(margin=go.Margin(t=25, b=20), height=125)
        return fig

    def draw(self):    
        st = self.p

        with st.expander('About the QuAL Score and this Tool', expanded=False):
            st.markdown(about_blurb)

        fig = self._build_figure()
        st.plotly_chart(fig, use_container_width=True)

        cols = st.columns(3)
        with cols[0]:
            q1lab = self.results['q1']['label']
            if q1lab == 0:
                md_str = '๐Ÿ˜ฅ None'
            elif q1lab == 1:
                md_str = '๐Ÿ˜ Low'
            elif q1lab == 2:
                md_str = '๐Ÿ˜Š Medium'
            elif q1lab == 3:
                md_str = '๐Ÿ˜ High'
                # prog_score, prog_theme = self.get_prog_setup('q1')
                # hc.info_card(title='Level of Detail', content=md_str, sentiment='good', bar_value=prog_score)
            st.metric('Level of Detail', md_str, 
                    help='Q1 - Evidence - Does the rater provide sufficient evidence about resident performance? (0-no comment at all, 1-no, but comment present, 2-somewhat, 3-yes/full description)')
            prog_score, prog_theme = self.get_prog_setup('q1')
                # hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)

        with cols[1]:
            q2lab = self.results['q2i']['label']
            if q2lab == 0:
                md_str = 'โœ… Yes'
            else:
                md_str = 'โŒ No'
            st.metric('Suggestion Given', (md_str),
                help='Q2 - Suggestion - Does the rater provide a suggestion for improvement? (0-no/1-yes)')
            prog_score, prog_theme = self.get_prog_setup('q2i')
            # hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)

        with cols[2]:
            q3lab = self.results['q3i']['label']
            if q3lab == 0:
                md_str = 'โœ… Yes'
            else:
                md_str = 'โŒ No'
            st.metric('Suggestion Linked', md_str,
                help='Q3 - Connection - Is the raterโ€™s suggestion linked to the behavior described? (0-no/1-yes)')
            prog_score, prog_theme = self.get_prog_setup('q3i')
            # hc.progress_bar(prog_score, f'{prog_score:.2f}% confident', override_theme=prog_theme)


    def get_prog_setup(self, q):
        prog_score = self.results[q]['scores'][self.results[q]['label']] * 100
        if prog_score > 75:
            prog_sent = '#28a745'
        elif (prog_score > 25) and (prog_score <= 75):
            prog_sent = '#ffc107'
        else:
            prog_sent = '#dc3545'
        prog_theme = {'content_color': 'white', 'progress_color': '#aaa'}
        return prog_score, prog_theme