File size: 2,903 Bytes
228ea6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

covariate_columns = {
    'content_domain': 'Content Domain',
    'language': 'Language',
    'rater_group': 'Rater Group',
}

id_vars = [
    'mean_z', 'text', 'content_domain', 'language', 
    'rater_group', 'study', 'instrument'
]

if 'df' not in st.session_state:
    st.session_state.df = (
        pd
        .read_feather(path='data.feather')
        .query('partition == "test" | partition == "dev"')
        .melt(
            value_vars=['sentiment_model', 'desirability_model'],
            var_name='x_group',
            value_name='x',
            id_vars=id_vars
            )
        .replace(
            to_replace={
                'en': 'English',
                'de': 'German',
                'other': 'Other',
                'personality': 'Personality',
                'laypeople': 'Laypeople',
                'students': 'Students',
                'sentiment_model': 'Sentiment Model',
                'desirability_model': 'Desirability Model'
            }    
        )
        .rename(columns=covariate_columns)
        .rename(
            columns={
                'mean_z': 'Human-ratings',
                'x': 'Machine-ratings',
            }
        )
    )

def scatter_plot(df, group_var):

    colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']

    plot = px.scatter(
        df, 
        x='Machine-ratings', 
        y='Human-ratings',
        color=group_var,
        facet_col='x_group', 
        facet_col_wrap=2,
        trendline='ols',
        trendline_scope='trace',
        hover_data={
            'Text': df.text,
            'Language': False,
            'x_group': False,            
            'Human-ratings': ':.2f',
            'Machine-ratings': ':.2f',
            'Study': df.study,
            'Instrument': df.instrument,
        },
        width=400,
        height=400,
        color_discrete_sequence=colors
    )
    
    plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
    plot.update_layout(
        legend={
            'orientation':'h',
            'yanchor': 'bottom',
            'y': -.30
        })
    plot.update_xaxes(title_standoff = 0)

    return plot

def show():
    st.markdown("""
        ## Explore the data
        Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
    """)

    show_covariates = st.checkbox('Show covariates', value=True)
    if show_covariates:
        option = st.selectbox('Group by', options=list(covariate_columns.values()))
    else:
        option = None
    
    if 'df' in st.session_state:
        plot = scatter_plot(st.session_state.df, option)
        st.plotly_chart(plot, theme=None, use_container_width=True)