bjorn-hommel commited on
Commit
28183db
1 Parent(s): 06f5189
Files changed (7) hide show
  1. app.py +290 -29
  2. db.py +71 -0
  3. demo_section.py +0 -312
  4. logo-130x130.svg +35 -0
  5. modeling.py +65 -0
  6. plots.py +105 -0
  7. explore_data_section.py → utils.py +1 -57
app.py CHANGED
@@ -1,42 +1,303 @@
1
- import os
2
- import torch
3
- import dash
4
  import streamlit as st
5
  import pandas as pd
6
- import json
7
- import random
8
- import firebase_admin
9
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
- from transformers import pipeline
11
- from firebase_admin import credentials, firestore
12
  from dotenv import load_dotenv
13
- import plotly.graph_objects as go
14
 
15
- import demo_section
16
- import explore_data_section
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- load_dotenv()
 
 
 
19
 
20
- if 'collect_data' not in st.session_state:
21
- st.session_state.collect_data = True
22
 
23
- if 'user_id' not in st.session_state:
24
- st.session_state.user_id = random.randint(1, 9999999)
 
 
 
 
 
 
 
 
25
 
26
- st.markdown("""
27
- # Machine-Based Item Desirability Ratings
28
- This web application accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- *Hommel, B. E. (2023). Expanding the methodological toolbox: Machine-based item desirability ratings as an alternative to human-based ratings. Personality and Individual Differences, 213, 112307. https://doi.org/10.1016/j.paid.2023.112307*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
 
 
 
 
 
 
 
32
 
33
- ## What is this research about?
34
- Researchers use personality scales to measure people's traits and behaviors, but biases can affect the accuracy of these scales.
35
- Socially desirable responding is a common bias that can skew results. To overcome this, researchers gather item desirability ratings, e.g., to ensure that questions are neutral.
36
- Recently, advancements in natural language processing have made it possible to use machines to estimate social desirability ratings,
37
- which can provide a viable alternative to human ratings and help researchers, scale developers, and practitioners improve the accuracy of personality scales.
38
- """)
39
 
 
40
 
41
- demo_section.show()
42
- explore_data_section.show()
 
1
+ import time
2
+ import random
3
+ import logging
4
  import streamlit as st
5
  import pandas as pd
6
+
 
 
 
 
 
7
  from dotenv import load_dotenv
 
8
 
9
+ import utils
10
+ import db
11
+ import modeling
12
+ import plots
13
+
14
+ def set_if_not_in_session_state(key, value):
15
+ """Helper function to initialize a session state variable if it doesn't exist."""
16
+ if key not in st.session_state:
17
+ st.session_state[key] = value
18
+
19
+ def initialize():
20
+ """Initialization function to set up logging, load environment variables, and initialize session state variables."""
21
+ load_dotenv()
22
+ logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
23
+
24
+ keys = ['selected_rating', 'collect_data', 'gender_value', 'expert_value', 'show_launch', 'user_id', 'statements', 'current_statement', 'db']
25
+ values = [0, None, None, None, True, random.randint(1, 999_999_999), None, None, None]
26
+
27
+ for key, value in zip(keys, values):
28
+ set_if_not_in_session_state(key, value)
29
+
30
+ connect_to_database()
31
+
32
+ def connect_to_database():
33
+ """Establishes a connection to the database."""
34
+ if st.session_state.db is None:
35
+ credentials_dict = db.load_credentials()
36
+ connection_attempts = 0
37
+
38
+ while st.session_state.db is None and connection_attempts < 3:
39
+ st.session_state.db = db.connect_to_db(credentials_dict)
40
+ if st.session_state.db is None:
41
+ logging.info('Retrying to connect to db...')
42
+ connection_attempts += 1
43
+ time.sleep(1)
44
+ else:
45
+ retrieve_statements()
46
+
47
+ def retrieve_statements():
48
+ """Retrieves statements from the database."""
49
+ retrieval_attempts = 0
50
+
51
+ while st.session_state.statements is None and retrieval_attempts < 3:
52
+ st.session_state.statements = db.get_statements_from_db(st.session_state.db)
53
+ st.session_state.current_statement = db.pick_random(st.session_state.statements)
54
+ if st.session_state.statements is None:
55
+ logging.info('Retrying to retrieve statements from db...')
56
+ retrieval_attempts += 1
57
+ time.sleep(1)
58
+
59
+ def get_user_consent():
60
+ st.markdown("""
61
+ ### Support Future Research
62
+ Additionally, we kindly ask for your agreement to collect anonymous data from your app usage in order to improve future research.
63
+ You may choose to agree or decline this data collection.
64
+ """)
65
+
66
+ collect_data_options = ['Yes, I agree and want to support and help improve this research', 'No']
67
+ collect_data_input = st.radio(
68
+ label='You may choose to agree or decline this data collection.',
69
+ options=collect_data_options,
70
+ horizontal=True,
71
+ label_visibility='collapsed'
72
+ )
73
+ return collect_data_options.index(collect_data_input) == 0
74
+
75
+
76
+ def get_user_info():
77
+ gender_options = ['[Please select]', 'Female', 'Male', 'Other']
78
+ gender_input = st.selectbox(
79
+ label='Please select your gender',
80
+ options=gender_options,
81
+ )
82
+ gender_value = gender_options.index(gender_input)
83
+
84
+ expert_options = [
85
+ '[PLEASE SELECT]',
86
+ 'No, I do not have a background in social or behavioral sciences',
87
+ 'Yes, I have either studied social or behavioral sciences or I am currently a student in this field',
88
+ 'Yes, I have either worked as a researcher in the field of social or behavioral sciences or I have had past experience as a researcher in this area'
89
+ ]
90
+ expert_input = st.selectbox(
91
+ label='Please indicate whether you have any experience or educational background in social or behavioral sciences (e.g., psychology)',
92
+ options=expert_options,
93
+ )
94
+ expert_value = expert_options.index(expert_input)
95
+
96
+ return expert_value, gender_value
97
+
98
+ def get_user_rating(placeholder):
99
+
100
+ with placeholder:
101
+ with st.container():
102
+ st.markdown(f"""
103
+ ### How desirable is the following statement?
104
+ To support future research, rate the following statement according to whether it is socially desirable or undesirable.
105
+ Is it socially desirable or undesirable to endorse the following statement?
106
+ #### <center>\"{st.session_state.current_statement.capitalize()}\"</center>
107
+ """, unsafe_allow_html=True)
108
+
109
+ rating_options = ['[Please select]', 'Very undesirable', 'Undesirable', 'Neutral', 'Desirable', 'Very desirable']
110
+
111
+ selected_rating = st.selectbox(
112
+ label='Rate the statement above according to whether it is socially desirable or undesirable.',
113
+ options=rating_options,
114
+ key='selection'
115
+ )
116
+
117
+ suitability_options = ['No, I\'m just playing around', 'Yes, my input can help improve this research']
118
+
119
+ research_suitability = st.radio(
120
+ label='Is your input suitable for research purposes?',
121
+ options=suitability_options,
122
+ horizontal=True
123
+ )
124
 
125
+ st.session_state.collect_data_optout = st.checkbox(
126
+ label='Don\'t ask me to rate further statements.',
127
+ value=False
128
+ )
129
 
130
+ st.session_state.item_rating = rating_options.index(selected_rating)
131
+ st.session_state.suitability_rating = suitability_options.index(research_suitability)
132
 
133
+ def handle_acceptance(collect_data_value, expert_value, gender_value, message):
134
+ if st.button(label='Accept Disclaimer', type='primary', use_container_width=True):
135
+ if collect_data_value and not (expert_value > 0 and gender_value > 0):
136
+ message.error('Please answer the questions above!')
137
+ else:
138
+ st.session_state.expert_value = expert_value
139
+ st.session_state.gender_value = gender_value
140
+ st.session_state.show_launch = False
141
+ st.session_state.collect_data = collect_data_value
142
+ st.experimental_rerun()
143
 
144
+ def show_launch(placeholder):
145
+ with placeholder:
146
+ with st.container():
147
+ st.divider()
148
+ st.markdown("""
149
+ ## Before Using the App
150
+ ### Disclaimer
151
+ This application is provided as-is, without any warranty or guarantee of any kind, expressed or implied. It is intended for educational, non-commercial use only.
152
+ The developers of this app shall not be held liable for any damages or losses incurred from its use. By using this application, you agree to the terms and conditions
153
+ outlined herein and acknowledge that any commercial use or reliance on its functionality is strictly prohibited.
154
+ """)
155
+
156
+ collect_data_value = False
157
+ if st.session_state.db:
158
+ collect_data_value = get_user_consent()
159
+
160
+ expert_value, gender_value = (0, 0)
161
+ if collect_data_value:
162
+ expert_value, gender_value = get_user_info()
163
+
164
+ message = st.empty()
165
+
166
+ handle_acceptance(collect_data_value, expert_value, gender_value, message)
167
+
168
+ def show_summary(placeholder):
169
+ with placeholder:
170
+ with st.container():
171
+ st.markdown("""
172
+ ## What is the focus of this research?
173
+ Certain biases can affect how people respond to surveys and psychological questionnaires.
174
+ For example, survey respondents may attempt to conceal socially undesirable traits (e.g.,
175
+ being ill-tempered) and endorse statements that cast them in a favorable manner (e.g.,
176
+ being cooperative).
177
+
178
+ Developers of psychological questionnaires hence sometimes aim to ensure that questions
179
+ are neutral, or that a subset of questions is equally (un)desirable. In the past, human
180
+ judges have been tasked with quantifying item desirability. In contrast, the research
181
+ underlying this web application demonstrates that large language models (LLMs) can
182
+ achieve this too!
183
+ """)
184
+
185
+ def handle_demo_input():
186
 
187
+ if st.session_state.collect_data:
188
+ if st.session_state.item_rating > 0:
189
+
190
+ st.session_state.sentiment, st.session_state.desirability = modeling.score_text(st.session_state.input_text)
191
+
192
+ payload = {
193
+ 'user_id': st.session_state.user_id,
194
+ 'gender_value': st.session_state.gender_value,
195
+ 'expert_value': st.session_state.expert_value,
196
+ 'statement': st.session_state.current_statement,
197
+ 'rating': st.session_state.item_rating,
198
+ 'suitability': st.session_state.suitability_rating,
199
+ 'input_text': st.session_state.input_text,
200
+ 'sentiment': st.session_state.sentiment,
201
+ 'desirability': st.session_state.desirability,
202
+ }
203
+ write_to_db_success = db.write_to_db(st.session_state.db, payload)
204
+
205
+ if st.session_state.collect_data_optout:
206
+ st.session_state.collect_data = False
207
+
208
+ if write_to_db_success:
209
+ st.session_state.current_statement = db.pick_random(st.session_state.statements)
210
+ st.session_state.selection = '[Please select]'
211
+ else:
212
+ return None
213
+ else:
214
+ st.session_state.sentiment, st.session_state.desirability = modeling.score_text(st.session_state.input_text)
215
+
216
+
217
+ def show_demo(placeholder):
218
+ with placeholder:
219
+ with st.container():
220
+ st.divider()
221
+ st.markdown("""
222
+ ## Try it yourself!
223
+ Use the text field below to enter a statement that might be part of a psychological
224
+ questionnaire (e.g., "I love a good fight."). Your input will be processed by
225
+ language models, returning a machine-based estimate of item sentiment (i.e., valence)
226
+ and desirability.
227
+
228
+ """)
229
+ modeling.load_model()
230
+
231
+ if 'sentiment' in st.session_state and 'desirability' in st.session_state:
232
+ plots.show_scores(
233
+ sentiment=st.session_state.sentiment,
234
+ desirability=st.session_state.desirability,
235
+ input_text=st.session_state.input_text
236
+ )
237
+
238
+ st.session_state.input_text = st.text_input(
239
+ label='Item text/statement:',
240
+ value='I love a good fight.',
241
+ placeholder='Enter item text'
242
+ )
243
+
244
+ user_rating_placeholder = st.empty()
245
+
246
+ if st.session_state.collect_data:
247
+ get_user_rating(user_rating_placeholder)
248
+
249
+ if st.button(
250
+ label='Evaluate Item Text',
251
+ on_click=handle_demo_input,
252
+ type='primary',
253
+ use_container_width=True
254
+ ):
255
+ if st.session_state.collect_data and st.session_state.item_rating == 0:
256
+ st.error('Please rate the statement presented above!')
257
+
258
+ def show_data(placeholder):
259
+ with placeholder:
260
+ with st.container():
261
+ st.divider()
262
+ st.markdown("""
263
+ ## Explore the data
264
+ Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
265
+ """)
266
+
267
+ show_covariates = st.checkbox('Show covariates', value=True)
268
+ if show_covariates:
269
+ option = st.selectbox('Group by', options=list(utils.covariate_columns.values()))
270
+ else:
271
+ option = None
272
+
273
+ if 'df' in st.session_state:
274
+ plot = plots.scatter_plot(st.session_state.df, option)
275
+ st.plotly_chart(plot, theme=None, use_container_width=True)
276
+
277
+ def main():
278
+ st.markdown("""
279
+ # Machine-Based Item Desirability Ratings
280
+ This web application demonstrates how item desirability ratings can be obtained with natural language processing ("AI") and accompanies the paper "*Expanding the Methodological Toolbox: Machine-Based Item Desirability Ratings as an Alternative to Human-Based Ratings*".
281
+
282
+ *Hommel, B. E. (2023). Expanding the methodological toolbox: Machine-based item desirability ratings as an alternative to human-based ratings. Personality and Individual Differences, 213, 112307. https://doi.org/10.1016/j.paid.2023.112307*
283
+
284
+ <small>https://www.magnolia-psychometrics.com/</small>
285
+ """, unsafe_allow_html=True)
286
+ placeholder_launch = st.empty()
287
+ placeholder_summary = st.empty()
288
+ placeholder_demo = st.empty()
289
+ placeholder_data = st.empty()
290
 
291
+ if st.session_state.show_launch is True:
292
+ show_launch(placeholder_launch)
293
+ else:
294
+ placeholder_launch = st.empty()
295
+ show_summary(placeholder_summary)
296
+ show_demo(placeholder_demo)
297
+ show_data(placeholder_data)
298
 
 
 
 
 
 
 
299
 
300
+ if __name__ == '__main__':
301
 
302
+ initialize()
303
+ main()
db.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import random
4
+ import logging
5
+ import firebase_admin
6
+ from firebase_admin import credentials, firestore
7
+
8
+ public_creds_path = 'public_creds.json'
9
+
10
+ def load_credentials():
11
+ try:
12
+ with open(public_creds_path) as f:
13
+ credentials_dict = json.load(f)
14
+ secret = {
15
+ 'private_key_id': os.environ.get('private_key_id'),
16
+ 'private_key': os.environ.get('private_key').replace(r'\n', '\n')
17
+ }
18
+ credentials_dict.update(secret)
19
+ return credentials_dict
20
+ except Exception as e:
21
+ logging.error(f'Error while loading credentials: {e}')
22
+ return None
23
+
24
+ def connect_to_db(credentials_dict):
25
+ try:
26
+ cred = credentials.Certificate(credentials_dict)
27
+ if not firebase_admin._apps:
28
+ firebase_admin.initialize_app(cred)
29
+ logging.info('Established connection to db!')
30
+ return firestore.client()
31
+ except Exception as e:
32
+ logging.error(f'Error while connecting to db: {e}')
33
+ return None
34
+
35
+ def get_statements_from_db(db):
36
+ try:
37
+ document = db.collection('ItemDesirability').document('Items')
38
+ statements = document.get().to_dict()['statements']
39
+ logging.info(f'Retrieved {len(statements)} statements from db!')
40
+ return statements
41
+ except Exception as e:
42
+ logging.error(f'Error while retrieving items from db: {e}')
43
+ return None
44
+
45
+ def pick_random(input_list):
46
+ try:
47
+ return random.choice(input_list)
48
+ except Exception as e:
49
+ logging.error(f'Error while picking random statement: {e}')
50
+ return None
51
+
52
+ def write_to_db(db, payload):
53
+
54
+ try:
55
+ collection_ref = db.collection('ItemDesirability')
56
+ doc_ref = collection_ref.document('Responses')
57
+ doc = doc_ref.get()
58
+
59
+ if doc.exists:
60
+ doc_ref.update({
61
+ 'Data': firestore.ArrayUnion([payload])
62
+ })
63
+ else:
64
+ doc_ref.set({
65
+ 'Data': [payload]
66
+ })
67
+ logging.info(f'Sent payload to db!')
68
+ return True
69
+ except Exception as e:
70
+ logging.error(f'Error while sending payload to db: {e}')
71
+ return False
demo_section.py DELETED
@@ -1,312 +0,0 @@
1
- import os
2
- import torch
3
- import json
4
- import time
5
- import random
6
- import streamlit as st
7
- import firebase_admin
8
- import logging
9
- from firebase_admin import credentials, firestore
10
- from dotenv import load_dotenv
11
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
12
- from transformers import pipeline
13
- import plotly.graph_objects as go
14
-
15
- logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
16
-
17
- load_dotenv()
18
-
19
- def load_credentials():
20
- try:
21
- with open('public_creds.json') as f:
22
- credentials_dict = json.load(f)
23
- secret = {
24
- 'private_key_id': os.environ.get('private_key_id'),
25
- 'private_key': os.environ.get('private_key').replace(r'\n', '\n')
26
- }
27
- credentials_dict.update(secret)
28
- return credentials_dict
29
- except Exception as e:
30
- logging.error(f'Error while loading credentials: {e}')
31
- return None
32
-
33
- def connect_to_db(credentials_dict):
34
- try:
35
- cred = credentials.Certificate(credentials_dict)
36
- if not firebase_admin._apps:
37
- firebase_admin.initialize_app(cred)
38
- logging.info('Established connection to db!')
39
- return firestore.client()
40
- except Exception as e:
41
- logging.error(f'Error while connecting to db: {e}')
42
- return None
43
-
44
- def get_statements_from_db(db):
45
- try:
46
- document = db.collection('ItemDesirability').document('Items')
47
- statements = document.get().to_dict()['statements']
48
- logging.info(f'Retrieved {len(statements)} statements from db!')
49
- return statements
50
- except Exception as e:
51
- logging.error(f'Error while retrieving items from db: {e}')
52
- return None
53
-
54
- def update_db(db, payload):
55
-
56
- try:
57
- collection_ref = db.collection('ItemDesirability')
58
- doc_ref = collection_ref.document('Responses')
59
- doc = doc_ref.get()
60
-
61
- if doc.exists:
62
- doc_ref.update({
63
- 'Data': firestore.ArrayUnion([payload])
64
- })
65
- else:
66
- doc_ref.set({
67
- 'Data': [payload]
68
- })
69
- logging.info(f'Sent payload to db!')
70
- return True
71
- except Exception as e:
72
- logging.error(f'Error while sending payload to db: {e}')
73
- return False
74
-
75
- def pick_random(input_list):
76
- try:
77
- return random.choice(input_list)
78
- except Exception as e:
79
- logging.error(f'Error while picking random statement: {e}')
80
- return None
81
-
82
- def z_score(y, mean=.04853076, sd=.9409466):
83
- return (y - mean) / sd
84
-
85
- def score_text(input_text):
86
- classifier_output = st.session_state.classifier(input_text)
87
- classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
88
- sentiment = classifier_output_dict['positive'] - classifier_output_dict['negative']
89
-
90
- inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
91
-
92
- with torch.no_grad():
93
- score = st.session_state.model(**inputs).logits.squeeze().tolist()
94
- desirability = z_score(score)
95
-
96
- return sentiment, desirability
97
-
98
- def indicator_plot(value, title, value_range, domain):
99
-
100
- plot = go.Indicator(
101
- mode = "gauge+delta",
102
- value = value,
103
- domain = domain,
104
- title = title,
105
- delta = {
106
- 'reference': 0,
107
- 'decreasing': {'color': "#ec4899"},
108
- 'increasing': {'color': "#36def1"}
109
- },
110
- gauge = {
111
- 'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': "black"},
112
- 'bar': {'color': "#4361ee"},
113
- 'bgcolor': "white",
114
- 'borderwidth': 2,
115
- 'bordercolor': "#efefef",
116
- 'steps': [
117
- {'range': [value_range[0], 0], 'color': '#efefef'},
118
- {'range': [0, value_range[1]], 'color': '#efefef'}
119
- ],
120
- 'threshold': {
121
- 'line': {'color': "#4361ee", 'width': 8},
122
- 'thickness': 0.75,
123
- 'value': value
124
- }
125
- }
126
- )
127
-
128
- return plot
129
-
130
- def show_scores(sentiment, desirability, input_text):
131
- p1 = indicator_plot(
132
- value=sentiment,
133
- title=f'Item Sentiment',
134
- value_range=[-1, 1],
135
- domain={'x': [0, .45], 'y': [0, 1]},
136
- )
137
-
138
- p2 = indicator_plot(
139
- value=desirability,
140
- title=f'Item Desirability',
141
- value_range=[-4, 4],
142
- domain={'x': [.55, 1], 'y': [0, 1]}
143
- )
144
-
145
- fig = go.Figure()
146
- fig.add_trace(p1)
147
- fig.add_trace(p2)
148
-
149
- fig.update_layout(
150
- title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
151
- paper_bgcolor = "white",
152
- font = {'color': "black", 'family': "Arial"})
153
-
154
- st.plotly_chart(fig, theme=None, use_container_width=True)
155
-
156
- st.markdown("""
157
- Item sentiment: Absolute differences between positive and negative sentiment.
158
- Item desirability: z-transformed values, 0 indicated "neutral".
159
- """)
160
-
161
- def update_statement_placeholder(placeholder):
162
-
163
- placeholder.markdown(
164
- body=f"""
165
- Is it socially desirable or undesirable to endorse the following statement?
166
- ### <center>\"{st.session_state.current_statement.capitalize()}\"</center>
167
- """,
168
- unsafe_allow_html=True
169
- )
170
-
171
- def show():
172
- credentials_dict = load_credentials()
173
- connection_attempts = 0
174
-
175
- if 'db' not in st.session_state:
176
- st.session_state.db = None
177
-
178
- while st.session_state.db is None and connection_attempts < 3:
179
- st.session_state.db = connect_to_db(credentials_dict)
180
- if st.session_state.db is None:
181
- logging.info('Retrying to connect to db...')
182
- connection_attempts += 1
183
- time.sleep(1)
184
-
185
-
186
- retrieval_attempts = 0
187
-
188
- if 'statements' not in st.session_state:
189
- st.session_state.statements = None
190
-
191
- if 'current_statement' not in st.session_state:
192
- st.session_state.current_statement = None
193
-
194
- while st.session_state.statements is None and retrieval_attempts < 3:
195
- st.session_state.statements = get_statements_from_db(st.session_state.db)
196
- st.session_state.current_statement = pick_random(st.session_state.statements)
197
- if st.session_state.statements is None:
198
- logging.info('Retrying to retrieve statements from db...')
199
- retrieval_attempts += 1
200
- time.sleep(1)
201
-
202
- st.markdown("""
203
- ## Try it yourself!
204
- Use the text field below to enter a statement that might be part of a psychological questionnaire (e.g., "I love a good fight.").
205
- The left dial indicates how socially desirable it might be to endorse this item.
206
- The right dial indicates sentiment (i.e., valence) as estimated by regular sentiment analysis (using the `cardiffnlp/twitter-xlm-roberta-base-sentiment` model).
207
- """)
208
-
209
- if st.session_state.db:
210
- collect_data = st.checkbox(
211
- label='I want to support and help improve this research.',
212
- value=True
213
- )
214
- else:
215
- collect_data = False
216
-
217
- if st.session_state.db and collect_data:
218
-
219
- statement_placeholder = st.empty()
220
- update_statement_placeholder(statement_placeholder)
221
-
222
- rating_options = ['[Please select]', 'Very undesirable', 'Undesirable', 'Neutral', 'Desirable', 'Very desirable']
223
-
224
- selected_rating = st.selectbox(
225
- label='Rate the statement above according to whether it is socially desirable or undesirable.',
226
- options=rating_options,
227
- index=0
228
- )
229
-
230
- suitability_options = ['No, I\'m just playing around', 'Yes, my input can help improve this research']
231
- research_suitability = st.radio(
232
- label='Is your input suitable for research purposes?',
233
- options=suitability_options,
234
- horizontal=True
235
- )
236
-
237
- with st.spinner('Loading the model might take a couple of seconds...'):
238
-
239
- st.markdown("### Estimate item desirability")
240
-
241
- if os.environ.get('item-desirability'):
242
- model_path = 'magnolia-psychometrics/item-desirability'
243
- else:
244
- model_path = os.getenv('model_path')
245
-
246
- auth_token = os.environ.get('item-desirability') or True
247
-
248
- if 'tokenizer' not in st.session_state:
249
- st.session_state.tokenizer = AutoTokenizer.from_pretrained(
250
- pretrained_model_name_or_path=model_path,
251
- use_fast=True,
252
- use_auth_token=auth_token
253
- )
254
-
255
- if 'model' not in st.session_state:
256
- st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
257
- pretrained_model_name_or_path=model_path,
258
- num_labels=1,
259
- ignore_mismatched_sizes=True,
260
- use_auth_token=auth_token
261
- )
262
-
263
- ## sentiment model
264
- if 'classifier' not in st.session_state:
265
- st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
266
- st.session_state.classifier = pipeline(
267
- task='sentiment-analysis',
268
- model=st.session_state.sentiment_model,
269
- tokenizer=st.session_state.sentiment_model,
270
- use_fast=False,
271
- top_k=3
272
- )
273
-
274
- input_text = st.text_input(
275
- label='Item text/statement:',
276
- value='I love a good fight.',
277
- placeholder='Enter item text'
278
- )
279
-
280
- if st.button(label='Evaluate Item Text', type="primary"):
281
- if collect_data and st.session_state.db:
282
- if selected_rating != rating_options[0]:
283
- item_rating = rating_options.index(selected_rating)
284
- suitability_rating = suitability_options.index(research_suitability)
285
- sentiment, desirability = score_text(input_text)
286
-
287
- payload = {
288
- 'user_id': st.session_state.user_id,
289
- 'statement': st.session_state.current_statement,
290
- 'rating': item_rating,
291
- 'suitability': suitability_rating,
292
- 'input_text': input_text,
293
- 'sentiment': sentiment,
294
- 'desirability': desirability,
295
- }
296
-
297
- update_success = update_db(
298
- db=st.session_state.db,
299
- payload=payload
300
- )
301
-
302
- if update_success:
303
- st.session_state.current_statement = pick_random(st.session_state.statements)
304
- update_statement_placeholder(statement_placeholder)
305
-
306
- show_scores(sentiment, desirability, input_text)
307
-
308
- else:
309
- st.error('Please rate the statement presented above!')
310
- else:
311
- sentiment, desirability = score_text(input_text)
312
- show_scores(sentiment, desirability, input_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logo-130x130.svg ADDED
modeling.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import torch
4
+ import streamlit as st
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+ from transformers import pipeline
7
+
8
+ sentiment_model_path = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
9
+
10
+ def load_model():
11
+
12
+ keys = ['tokenizer', 'model', 'classifier']
13
+
14
+ if any(st.session_state.get(key) is None for key in keys):
15
+ with st.spinner('Loading the model might take a couple of seconds...'):
16
+ try:
17
+ if os.environ.get('item-desirability'):
18
+ model_path = 'magnolia-psychometrics/item-desirability'
19
+ else:
20
+ model_path = os.getenv('model_path')
21
+
22
+ auth_token = os.environ.get('item-desirability') or True
23
+
24
+ st.session_state.tokenizer = AutoTokenizer.from_pretrained(
25
+ pretrained_model_name_or_path=model_path,
26
+ use_fast=True,
27
+ use_auth_token=auth_token
28
+ )
29
+
30
+ st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
31
+ pretrained_model_name_or_path=model_path,
32
+ num_labels=1,
33
+ ignore_mismatched_sizes=True,
34
+ use_auth_token=auth_token
35
+ )
36
+
37
+ st.session_state.classifier = pipeline(
38
+ task='sentiment-analysis',
39
+ model=sentiment_model_path,
40
+ tokenizer=sentiment_model_path,
41
+ use_fast=False,
42
+ top_k=3
43
+ )
44
+
45
+ logging.info('Loaded models and tokenizer!')
46
+
47
+ except Exception as e:
48
+ logging.error(f'Error while loading models/tokenizer: {e}')
49
+
50
+ def z_score(y, mean=.04853076, sd=.9409466):
51
+ return (y - mean) / sd
52
+
53
+ def score_text(input_text):
54
+ with st.spinner('Predicting...'):
55
+ classifier_output = st.session_state.classifier(input_text)
56
+ classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
57
+ sentiment = classifier_output_dict['positive'] - classifier_output_dict['negative']
58
+
59
+ inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
60
+
61
+ with torch.no_grad():
62
+ score = st.session_state.model(**inputs).logits.squeeze().tolist()
63
+ desirability = z_score(score)
64
+
65
+ return sentiment, desirability
plots.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.graph_objects as go
3
+ import plotly.express as px
4
+
5
+ def indicator_plot(value, title, value_range, domain):
6
+
7
+ plot = go.Indicator(
8
+ mode = 'gauge+delta',
9
+ value = value,
10
+ domain = domain,
11
+ title = title,
12
+ delta = {
13
+ 'reference': 0,
14
+ 'decreasing': {'color': '#ec4899'},
15
+ 'increasing': {'color': '#36def1'}
16
+ },
17
+ gauge = {
18
+ 'axis': {'range': value_range, 'tickwidth': 1, 'tickcolor': 'black'},
19
+ 'bar': {'color': '#4361ee'},
20
+ 'bgcolor': 'white',
21
+ 'borderwidth': 2,
22
+ 'bordercolor': '#efefef',
23
+ 'steps': [
24
+ {'range': [value_range[0], 0], 'color': '#efefef'},
25
+ {'range': [0, value_range[1]], 'color': '#efefef'}
26
+ ],
27
+ 'threshold': {
28
+ 'line': {'color': '#4361ee', 'width': 8},
29
+ 'thickness': 0.75,
30
+ 'value': value
31
+ }
32
+ }
33
+ )
34
+
35
+ return plot
36
+
37
+ def scatter_plot(df, group_var):
38
+
39
+ colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
40
+
41
+ plot = px.scatter(
42
+ df,
43
+ x='Machine-ratings',
44
+ y='Human-ratings',
45
+ color=group_var,
46
+ facet_col='x_group',
47
+ facet_col_wrap=2,
48
+ trendline='ols',
49
+ trendline_scope='trace',
50
+ hover_data={
51
+ 'Text': df.text,
52
+ 'Language': False,
53
+ 'x_group': False,
54
+ 'Human-ratings': ':.2f',
55
+ 'Machine-ratings': ':.2f',
56
+ 'Study': df.study,
57
+ 'Instrument': df.instrument,
58
+ },
59
+ width=400,
60
+ height=400,
61
+ color_discrete_sequence=colors
62
+ )
63
+
64
+ plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
65
+ plot.update_layout(
66
+ legend={
67
+ 'orientation':'h',
68
+ 'yanchor': 'bottom',
69
+ 'y': -.30
70
+ })
71
+ plot.update_xaxes(title_standoff = 0)
72
+
73
+ return plot
74
+
75
+ def show_scores(sentiment, desirability, input_text):
76
+ with st.container():
77
+ p1 = indicator_plot(
78
+ value=sentiment,
79
+ title=f'Item Sentiment',
80
+ value_range=[-1, 1],
81
+ domain={'x': [0, .45], 'y': [0, .5]},
82
+ )
83
+
84
+ p2 = indicator_plot(
85
+ value=desirability,
86
+ title=f'Item Desirability',
87
+ value_range=[-4, 4],
88
+ domain={'x': [.55, 1], 'y': [0, .5]}
89
+ )
90
+
91
+ fig = go.Figure()
92
+ fig.add_trace(p1)
93
+ fig.add_trace(p2)
94
+
95
+ fig.update_layout(
96
+ title=dict(text=f'"{input_text}"', font=dict(size=36),yref='paper'),
97
+ paper_bgcolor = 'white',
98
+ font = {'color': 'black', 'family': 'Arial'})
99
+
100
+ st.plotly_chart(fig, theme=None, use_container_width=True)
101
+
102
+ st.markdown("""
103
+ Item sentiment: Absolute differences between positive and negative sentiment.
104
+ Item desirability: z-transformed values, 0 indicated "neutral".
105
+ """)
explore_data_section.py → utils.py RENAMED
@@ -1,7 +1,5 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.graph_objects as go
4
- import plotly.express as px
5
 
6
  covariate_columns = {
7
  'content_domain': 'Content Domain',
@@ -44,58 +42,4 @@ if 'df' not in st.session_state:
44
  'x': 'Machine-ratings',
45
  }
46
  )
47
- )
48
-
49
- def scatter_plot(df, group_var):
50
-
51
- colors = ['#36def1', '#4361ee'] if group_var else ['#4361ee']
52
-
53
- plot = px.scatter(
54
- df,
55
- x='Machine-ratings',
56
- y='Human-ratings',
57
- color=group_var,
58
- facet_col='x_group',
59
- facet_col_wrap=2,
60
- trendline='ols',
61
- trendline_scope='trace',
62
- hover_data={
63
- 'Text': df.text,
64
- 'Language': False,
65
- 'x_group': False,
66
- 'Human-ratings': ':.2f',
67
- 'Machine-ratings': ':.2f',
68
- 'Study': df.study,
69
- 'Instrument': df.instrument,
70
- },
71
- width=400,
72
- height=400,
73
- color_discrete_sequence=colors
74
- )
75
-
76
- plot.for_each_annotation(lambda a: a.update(text=a.text.split('=')[-1]))
77
- plot.update_layout(
78
- legend={
79
- 'orientation':'h',
80
- 'yanchor': 'bottom',
81
- 'y': -.30
82
- })
83
- plot.update_xaxes(title_standoff = 0)
84
-
85
- return plot
86
-
87
- def show():
88
- st.markdown("""
89
- ## Explore the data
90
- Figures show the accuarcy in precitions of human-rated item desirability by the sentiment model (left) and the desirability model (right), using `test`-partition data only.
91
- """)
92
-
93
- show_covariates = st.checkbox('Show covariates', value=True)
94
- if show_covariates:
95
- option = st.selectbox('Group by', options=list(covariate_columns.values()))
96
- else:
97
- option = None
98
-
99
- if 'df' in st.session_state:
100
- plot = scatter_plot(st.session_state.df, option)
101
- st.plotly_chart(plot, theme=None, use_container_width=True)
 
1
  import streamlit as st
2
  import pandas as pd
 
 
3
 
4
  covariate_columns = {
5
  'content_domain': 'Content Domain',
 
42
  'x': 'Machine-ratings',
43
  }
44
  )
45
+ )