Commit
Β·
773685b
1
Parent(s):
37f480c
init commit
Browse files- .env +1 -0
- __pycache__/modeling.cpython-310.pyc +0 -0
- app.py +157 -0
- init.json +3 -0
- logo-130x130.svg +35 -0
- modeling.py +68 -0
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
model_path="/nlp/models/published/bandura-v1"
|
__pycache__/modeling.cpython-310.pyc
ADDED
Binary file (2.56 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import logging
|
4 |
+
import json
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
|
7 |
+
import modeling
|
8 |
+
|
9 |
+
def show_launch(placeholder):
|
10 |
+
with placeholder.container():
|
11 |
+
st.divider()
|
12 |
+
st.markdown("""
|
13 |
+
## Before Using the App
|
14 |
+
### Disclaimer
|
15 |
+
This application is provided as-is, without any warranty or guarantee of any kind, expressed or implied. It is intended for educational, non-commercial use only.
|
16 |
+
The developers of this app shall not be held liable for any damages or losses incurred from its use. By using this application, you agree to the terms and conditions
|
17 |
+
outlined herein and acknowledge that any commercial use or reliance on its functionality is strictly prohibited.
|
18 |
+
""", unsafe_allow_html=True)
|
19 |
+
|
20 |
+
button_placeholder = st.empty()
|
21 |
+
|
22 |
+
if button_placeholder.button(label='Accept Disclaimer', type='primary', use_container_width=True):
|
23 |
+
st.session_state.show_launch = False
|
24 |
+
placeholder.empty()
|
25 |
+
button_placeholder.empty()
|
26 |
+
|
27 |
+
def show_demo(placeholder):
|
28 |
+
|
29 |
+
with placeholder:
|
30 |
+
with st.container():
|
31 |
+
st.divider()
|
32 |
+
st.markdown("""
|
33 |
+
## Try it yourself!
|
34 |
+
Use the input fields provided below to create items aimed at
|
35 |
+
assessing a particular psychological construct (e.g., personality
|
36 |
+
trait). If desired, employ the prefix option to generate items
|
37 |
+
that begin with a predetermined string. To manage the diversity
|
38 |
+
of the output, various sampling strategies may be applied.
|
39 |
+
For further information on these strategies, please refer to the
|
40 |
+
accompanying paper.
|
41 |
+
""")
|
42 |
+
|
43 |
+
modeling.load_model()
|
44 |
+
|
45 |
+
sampling_options = ['Greedy Search', 'Beam Search', 'Multinominal Sampling']
|
46 |
+
sampling_input = st.radio('Sampling', options=sampling_options, index=2, horizontal=True)
|
47 |
+
left_col, right_col = st.columns([1, 1])
|
48 |
+
|
49 |
+
with left_col:
|
50 |
+
prefix_input = st.text_input('Prefix', '')
|
51 |
+
construct_input = st.text_input('Construct', 'Pessimism')
|
52 |
+
|
53 |
+
with right_col:
|
54 |
+
if sampling_options.index(sampling_input) == 0:
|
55 |
+
num_beams = 1
|
56 |
+
num_return_sequences = 1
|
57 |
+
temperature = 1
|
58 |
+
top_k = 0
|
59 |
+
top_p = 1
|
60 |
+
|
61 |
+
if sampling_options.index(sampling_input) == 1:
|
62 |
+
num_beams = st.slider('Number of Search Beams', min_value=1, max_value=10, value=3, step=1)
|
63 |
+
num_return_sequences = st.slider('Number of Beams to Return', min_value=1, max_value=10, value=2, step=1)
|
64 |
+
temperature = 1
|
65 |
+
top_k = 0
|
66 |
+
top_p = 1
|
67 |
+
|
68 |
+
if sampling_options.index(sampling_input) == 2:
|
69 |
+
num_beams = 1
|
70 |
+
num_return_sequences = 1
|
71 |
+
temperature = st.slider('Temperature', min_value=0.1, max_value=1.5, value=1.0, step=0.1)
|
72 |
+
top_k = st.slider('Top k (0 = disabled)', min_value=0, max_value=1000, value=40, step=1)
|
73 |
+
top_p = st.slider('Top p (0 = disabled)', min_value=0.0, max_value=1.0, value=0.95, step=0.05)
|
74 |
+
|
75 |
+
message = st.empty()
|
76 |
+
|
77 |
+
if st.button(label='Generate Item', type='primary', use_container_width=True):
|
78 |
+
if num_return_sequences <= num_beams:
|
79 |
+
if len(construct_input) > 0:
|
80 |
+
|
81 |
+
kwargs = {
|
82 |
+
'num_return_sequences': num_return_sequences,
|
83 |
+
'num_beams': num_beams,
|
84 |
+
'do_sample': sampling_options.index(sampling_input) == 2,
|
85 |
+
'temperature': temperature,
|
86 |
+
'top_k': top_k,
|
87 |
+
'top_p': top_p
|
88 |
+
}
|
89 |
+
|
90 |
+
item_stems = modeling.generate_items(construct_input, prefix_input, **kwargs)
|
91 |
+
st.session_state.outputs.append({'construct': construct_input, 'item': item_stems})
|
92 |
+
else:
|
93 |
+
message.error('You have to enter a construct to proceed with item generation!')
|
94 |
+
else:
|
95 |
+
message.error('You cannot return more beams than to search for!')
|
96 |
+
|
97 |
+
|
98 |
+
if len(st.session_state.outputs) > 0:
|
99 |
+
tab1, tab2 = st.tabs(["Generated Items", "Details on last prompt"])
|
100 |
+
|
101 |
+
with tab1:
|
102 |
+
for output in st.session_state.outputs:
|
103 |
+
placeholder_outputs = st.empty()
|
104 |
+
|
105 |
+
with tab2:
|
106 |
+
pass
|
107 |
+
|
108 |
+
df = pd.DataFrame(st.session_state.outputs).explode(column='item').reset_index()
|
109 |
+
placeholder_outputs = st.dataframe(df.sort_values(by='index', ascending=False), use_container_width=True)
|
110 |
+
|
111 |
+
def initialize():
|
112 |
+
load_dotenv()
|
113 |
+
logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
|
114 |
+
|
115 |
+
if 'state_loaded' not in st.session_state:
|
116 |
+
st.session_state['state_loaded'] = True
|
117 |
+
with open('init.json') as json_data:
|
118 |
+
st.session_state.update(json.load(json_data))
|
119 |
+
|
120 |
+
def main():
|
121 |
+
st.set_page_config(page_title='Construct-Specific Automatic Item Generation')
|
122 |
+
|
123 |
+
col1, col2 = st.columns([2, 5])
|
124 |
+
with col1:
|
125 |
+
st.image('logo-130x130.svg')
|
126 |
+
|
127 |
+
with col2:
|
128 |
+
st.markdown("# Construct-Specific Automatic Item Generation")
|
129 |
+
|
130 |
+
st.markdown("""
|
131 |
+
This web application showcases item generation for psychological scale development
|
132 |
+
using natural language processing ("AI"), accompanying the paper
|
133 |
+
"Transformer-Based Deep Neural Language Modeling for Construct-Specific Automatic Item Generation".
|
134 |
+
|
135 |
+
π Paper (Open Access): https://link.springer.com/article/10.1007/s11336-021-09823-9
|
136 |
+
|
137 |
+
πΎ Data: https://osf.io/rhe9w/
|
138 |
+
|
139 |
+
ποΈ Cite:<br> Hommel, B. E., Wollang, F.-J. M., Kotova, V., Zacher, H., & Schmukle, S. C. (2022). Transformer-Based Deep Neural Language Modeling for Construct-Specific Automatic Item Generation. Psychometrika, 87(2), 749β772. https://doi.org/10.1007/s11336-021-09823-9
|
140 |
+
|
141 |
+
#οΈβ£ Twitter/X: https://twitter.com/BjoernHommel
|
142 |
+
|
143 |
+
The web application is maintained by [magnolia psychometrics](https://www.magnolia-psychometrics.com/).
|
144 |
+
""", unsafe_allow_html=True)
|
145 |
+
|
146 |
+
placeholder_launch = st.empty()
|
147 |
+
placeholder_demo = st.empty()
|
148 |
+
|
149 |
+
if 'disclaimer' not in st.session_state:
|
150 |
+
show_launch(placeholder_launch)
|
151 |
+
st.session_state['disclaimer'] = True
|
152 |
+
else:
|
153 |
+
show_demo(placeholder_demo)
|
154 |
+
|
155 |
+
if __name__ == '__main__':
|
156 |
+
initialize()
|
157 |
+
main()
|
init.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"outputs": []
|
3 |
+
}
|
logo-130x130.svg
ADDED
|
modeling.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
import streamlit as st
|
4 |
+
from transformers import pipeline
|
5 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
6 |
+
|
7 |
+
def load_model():
|
8 |
+
|
9 |
+
keys = ['generator']
|
10 |
+
|
11 |
+
if any(st.session_state.get(key) is None for key in keys):
|
12 |
+
|
13 |
+
with st.spinner('Loading the model might take a couple of seconds...'):
|
14 |
+
try:
|
15 |
+
if os.environ.get('remote_model_path'):
|
16 |
+
model_path = os.environ.get('remote_model_path')
|
17 |
+
else:
|
18 |
+
model_path = os.getenv('model_path')
|
19 |
+
|
20 |
+
st.session_state.generator = pipeline(task='text-generation', model=model_path, tokenizer=model_path)
|
21 |
+
|
22 |
+
logging.info('Loaded models and tokenizer!')
|
23 |
+
|
24 |
+
except Exception as e:
|
25 |
+
logging.error(f'Error while loading models/tokenizer: {e}')
|
26 |
+
|
27 |
+
def generate_items(constructs, prefix='', **kwargs):
|
28 |
+
|
29 |
+
with st.spinner(f'Generating item(s) for `{constructs}`...'):
|
30 |
+
construct_sep = '#'
|
31 |
+
item_sep = '@'
|
32 |
+
|
33 |
+
constructs = constructs if isinstance(constructs, list) else [constructs]
|
34 |
+
encoded_constructs = construct_sep + construct_sep.join([x.lower() for x in constructs])
|
35 |
+
encoded_prompt = f'{encoded_constructs}{item_sep}{prefix}'
|
36 |
+
|
37 |
+
outputs = st.session_state.generator(encoded_prompt, **kwargs)
|
38 |
+
truncate_str = f'{encoded_constructs}{item_sep}'
|
39 |
+
|
40 |
+
item_stems = []
|
41 |
+
for output in outputs:
|
42 |
+
item_stems.append(output['generated_text'].replace(truncate_str, ''))
|
43 |
+
|
44 |
+
return item_stems
|
45 |
+
|
46 |
+
def get_next_tokens(prefix, breadth=5):
|
47 |
+
# Load tokenizer and model
|
48 |
+
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
|
49 |
+
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
50 |
+
|
51 |
+
# Encode the prefix
|
52 |
+
inputs = tokenizer(prefix, return_tensors='pt')
|
53 |
+
|
54 |
+
# Get the model's predictions
|
55 |
+
with torch.no_grad():
|
56 |
+
outputs = model(**inputs)
|
57 |
+
|
58 |
+
logits = outputs.logits
|
59 |
+
# Only consider the last token for next token predictions
|
60 |
+
last_token_logits = logits[:, -1, :]
|
61 |
+
|
62 |
+
# Get the indices of the top 'breadth' possible next tokens
|
63 |
+
top_tokens = torch.topk(last_token_logits, breadth, dim=1).indices.tolist()[0]
|
64 |
+
|
65 |
+
# Decode the token IDs to tokens
|
66 |
+
next_tokens = [tokenizer.decode([token_id]) for token_id in top_tokens]
|
67 |
+
|
68 |
+
return next_tokens
|