Spaces:

magnolia-psychometrics
/

item-generator

Sleeping

App Files Files Community

bjorn-hommel commited on Feb 16, 2024

Commit

773685b

1 Parent(s): 37f480c

init commit

Browse files

Files changed (6) hide show

.env +1 -0
__pycache__/modeling.cpython-310.pyc +0 -0
app.py +157 -0
init.json +3 -0
logo-130x130.svg +35 -0
modeling.py +68 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_path="/nlp/models/published/bandura-v1"

__pycache__/modeling.cpython-310.pyc ADDED Viewed

Binary file (2.56 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import streamlit as st
+import pandas as pd
+import logging
+import json
+from dotenv import load_dotenv
+import modeling
+def show_launch(placeholder):
+    with placeholder.container():
+        st.divider()
+        st.markdown("""
+            ## Before Using the App
+            ### Disclaimer
+            This application is provided as-is, without any warranty or guarantee of any kind, expressed or implied. It is intended for educational, non-commercial use only.
+            The developers of this app shall not be held liable for any damages or losses incurred from its use. By using this application, you agree to the terms and conditions
+            outlined herein and acknowledge that any commercial use or reliance on its functionality is strictly prohibited.
+        """, unsafe_allow_html=True)
+        button_placeholder = st.empty()
+        if button_placeholder.button(label='Accept Disclaimer', type='primary', use_container_width=True):
+            st.session_state.show_launch = False
+            placeholder.empty()
+            button_placeholder.empty()
+def show_demo(placeholder):
+    with placeholder:
+        with st.container():
+            st.divider()
+            st.markdown("""
+                ## Try it yourself!
+                Use the input fields provided below to create items aimed at
+                assessing a particular psychological construct (e.g., personality
+                trait). If desired, employ the prefix option to generate items
+                that begin with a predetermined string. To manage the diversity
+                of the output, various sampling strategies may be applied.
+                For further information on these strategies, please refer to the
+                accompanying paper.
+            """)
+            modeling.load_model()
+    sampling_options = ['Greedy Search', 'Beam Search', 'Multinominal Sampling']
+    sampling_input = st.radio('Sampling', options=sampling_options, index=2, horizontal=True)
+    left_col, right_col = st.columns([1, 1])
+    with left_col:
+        prefix_input = st.text_input('Prefix', '')
+        construct_input = st.text_input('Construct', 'Pessimism')
+    with right_col:
+        if sampling_options.index(sampling_input) == 0:
+            num_beams = 1
+            num_return_sequences = 1
+            temperature = 1
+            top_k = 0
+            top_p = 1
+        if sampling_options.index(sampling_input) == 1:
+            num_beams = st.slider('Number of Search Beams', min_value=1, max_value=10, value=3, step=1)
+            num_return_sequences = st.slider('Number of Beams to Return', min_value=1, max_value=10, value=2, step=1)
+            temperature = 1
+            top_k = 0
+            top_p = 1
+        if sampling_options.index(sampling_input) == 2:
+            num_beams = 1
+            num_return_sequences = 1
+            temperature = st.slider('Temperature', min_value=0.1, max_value=1.5, value=1.0, step=0.1)
+            top_k = st.slider('Top k (0 = disabled)', min_value=0, max_value=1000, value=40, step=1)
+            top_p = st.slider('Top p (0 = disabled)', min_value=0.0, max_value=1.0, value=0.95, step=0.05)
+    message = st.empty()
+    if st.button(label='Generate Item', type='primary', use_container_width=True):
+        if num_return_sequences <= num_beams:
+            if len(construct_input) > 0:
+                kwargs = {
+                    'num_return_sequences': num_return_sequences,
+                    'num_beams': num_beams,
+                    'do_sample': sampling_options.index(sampling_input) == 2,
+                    'temperature': temperature,
+                    'top_k': top_k,
+                    'top_p': top_p
+                }
+                item_stems = modeling.generate_items(construct_input, prefix_input, **kwargs)
+                st.session_state.outputs.append({'construct': construct_input, 'item': item_stems})
+            else:
+                message.error('You have to enter a construct to proceed with item generation!')
+        else:
+            message.error('You cannot return more beams than to search for!')
+    if len(st.session_state.outputs) > 0:
+        tab1, tab2 = st.tabs(["Generated Items", "Details on last prompt"])
+        with tab1:
+            for output in st.session_state.outputs:
+                placeholder_outputs = st.empty()
+        with tab2:
+            pass
+        df = pd.DataFrame(st.session_state.outputs).explode(column='item').reset_index()
+        placeholder_outputs = st.dataframe(df.sort_values(by='index', ascending=False), use_container_width=True)
+def initialize():
+    load_dotenv()
+    logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO)
+    if 'state_loaded' not in st.session_state:
+        st.session_state['state_loaded'] = True
+        with open('init.json') as json_data:
+            st.session_state.update(json.load(json_data))
+def main():
+    st.set_page_config(page_title='Construct-Specific Automatic Item Generation')
+    col1, col2 = st.columns([2, 5])
+    with col1:
+        st.image('logo-130x130.svg')
+    with col2:
+        st.markdown("# Construct-Specific Automatic Item Generation")
+    st.markdown("""
+        This web application showcases item generation for psychological scale development
+        using natural language processing ("AI"), accompanying the paper
+        "Transformer-Based Deep Neural Language Modeling for Construct-Specific Automatic Item Generation".
+        📖 Paper (Open Access): https://link.springer.com/article/10.1007/s11336-021-09823-9
+        💾 Data: https://osf.io/rhe9w/
+        🖊️ Cite:<br> Hommel, B. E., Wollang, F.-J. M., Kotova, V., Zacher, H., & Schmukle, S. C. (2022). Transformer-Based Deep Neural Language Modeling for Construct-Specific Automatic Item Generation. Psychometrika, 87(2), 749–772. https://doi.org/10.1007/s11336-021-09823-9
+        #️⃣ Twitter/X: https://twitter.com/BjoernHommel
+        The web application is maintained by [magnolia psychometrics](https://www.magnolia-psychometrics.com/).
+    """, unsafe_allow_html=True)
+    placeholder_launch = st.empty()
+    placeholder_demo = st.empty()
+    if 'disclaimer' not in st.session_state:
+        show_launch(placeholder_launch)
+        st.session_state['disclaimer'] = True
+    else:
+        show_demo(placeholder_demo)
+if __name__ == '__main__':
+    initialize()
+    main()

init.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "outputs": []
+}

logo-130x130.svg ADDED Viewed

modeling.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import logging
+import streamlit as st
+from transformers import pipeline
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
+def load_model():
+    keys = ['generator']
+    if any(st.session_state.get(key) is None for key in keys):
+        with st.spinner('Loading the model might take a couple of seconds...'):
+            try:
+                if os.environ.get('remote_model_path'):
+                    model_path = os.environ.get('remote_model_path')
+                else:
+                    model_path = os.getenv('model_path')
+                st.session_state.generator = pipeline(task='text-generation', model=model_path, tokenizer=model_path)
+                logging.info('Loaded models and tokenizer!')
+            except Exception as e:
+                logging.error(f'Error while loading models/tokenizer: {e}')
+def generate_items(constructs, prefix='', **kwargs):
+    with st.spinner(f'Generating item(s) for `{constructs}`...'):
+        construct_sep = '#'
+        item_sep = '@'
+        constructs = constructs if isinstance(constructs, list) else [constructs]
+        encoded_constructs = construct_sep + construct_sep.join([x.lower() for x in constructs])
+        encoded_prompt = f'{encoded_constructs}{item_sep}{prefix}'
+        outputs = st.session_state.generator(encoded_prompt, **kwargs)
+        truncate_str = f'{encoded_constructs}{item_sep}'
+        item_stems = []
+        for output in outputs:
+            item_stems.append(output['generated_text'].replace(truncate_str, ''))
+        return item_stems
+def get_next_tokens(prefix, breadth=5):
+    # Load tokenizer and model
+    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
+    model = GPT2LMHeadModel.from_pretrained('gpt2')
+    # Encode the prefix
+    inputs = tokenizer(prefix, return_tensors='pt')
+    # Get the model's predictions
+    with torch.no_grad():
+        outputs = model(**inputs)
+    logits = outputs.logits
+    # Only consider the last token for next token predictions
+    last_token_logits = logits[:, -1, :]
+    # Get the indices of the top 'breadth' possible next tokens
+    top_tokens = torch.topk(last_token_logits, breadth, dim=1).indices.tolist()[0]
+    # Decode the token IDs to tokens
+    next_tokens = [tokenizer.decode([token_id]) for token_id in top_tokens]
+    return next_tokens