mod_prediction_v

Sleeping

App Files Files Community

emvecchi commited on Jul 16

Commit

3848935

•

1 Parent(s): 1ba5f54

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -43

app.py CHANGED Viewed

@@ -4,12 +4,15 @@ from dataclasses import dataclass, field
 from typing import List, Optional, Dict
 from PIL import Image
 import pandas as pd
 import streamlit as st
 from huggingface_hub import HfFileSystem
 import streamlit.components.v1 as components
 @dataclass
 class Field:
     type: str
@@ -23,19 +26,24 @@ class Field:
     other_params: Optional[Dict[str, object]] = field(default_factory=lambda: {})
 # Function to get user ID from URL
-def get_user_id_from_url():
-    user_id = st.query_params.get("user_id", "")
     return user_id
-HF_TOKEN = os.environ.get("HF_TOKEN_WRITE2")
-print("is none?", HF_TOKEN is None)
-hf_fs = HfFileSystem(token=HF_TOKEN)
 ########################################################################################
 # CHANGE THE FOLLOWING VARIABLES ACCORDING TO YOUR NEEDS
 input_repo_path = 'datasets/emvecchi/annotation'
-output_repo_path = 'datasets/emvecchi/annotation'
 to_annotate_file_name = 'to_annotate.csv'  # CSV file to annotate
 COLS_TO_SAVE = ['comment_id','comment','confidence_score']
@@ -128,6 +136,21 @@ Please indicate, in the box below, that you are at least 18 years old, have read
 '''
 guidelines_text = 'Please read <a href="https://acrobat.adobe.com/id/urn:aaid:sc:EU:1a1347b0-3423-49ee-aa28-87679f8a69c0">the guidelines</a>'
 study_code = 'CE552C7F'
 intro_fields: List[Field] = [
     Field(type="container", title="**Introductory Questions**", children=[
@@ -201,10 +224,23 @@ SHOW_HELP_ICON = False
 SHOW_VALIDATION_ERROR_MESSAGE = True
 ########################################################################################
-def read_data(_path):
-    with hf_fs.open(input_repo_path + '/' + _path) as f:
-        return pd.read_csv(f)
 def read_saved_data():
     _path = get_path()
@@ -215,17 +251,21 @@ def read_saved_data():
             except json.JSONDecodeError as e:
                 print(e)
     return None
 # Write a remote file
 def save_data(data):
-    hf_fs.mkdir(f"{output_repo_path}/{data['user_id']}")
     with hf_fs.open(f"{output_repo_path}/{get_path()}", "w") as f:
         f.write(json.dumps(data))
 def get_path():
-    return f"{st.session_state.user_id}/{st.session_state.current_index}.json"
 def display_image(image_path):
@@ -276,15 +316,13 @@ def show_field(f: Field, index: int, data_collected):
         st.session_state.following_mandatory = False
         match f.type:
             case 'input_col':
-                st.write(f.title)
-                if f.name == 'image_name':
                     st.write(f.title)
-                    image_name = st.session_state.data.iloc[index][f.name]
-                    if image_name:  # Ensure the image name is not empty
-                        image_path = os.path.join(input_repo_path, 'images', image_name)
-                        display_image(image_path)
-                else:
-                    st.write(st.session_state.data.iloc[index][f.name])
             case 'markdown':
                 st.markdown(f.title)
             case 'expander' | 'container':
@@ -293,6 +331,8 @@ def show_field(f: Field, index: int, data_collected):
                         st.markdown(f.title)
                     for child in f.children:
                         show_field(child, index, data_collected)
     else:
         key = f.name + str(index)
         st.session_state.data_inputs_keys.append(f.name)
@@ -302,17 +342,32 @@ def show_field(f: Field, index: int, data_collected):
             f.title = f'**{f.title}**\n\n{f.help}' if f.help else f.title
         validation_error = False
-        if f.mandatory or st.session_state.following_mandatory:
-            # form is not displayed for first time
-            if st.session_state.form_displayed == st.session_state.current_index:
                 if st.session_state[key] == INPUT_FIELD_DEFAULT_VALUES[f.type]:
                     st.session_state.valid = False
                     validation_error = True
                 elif f.following_mandatory_values and st.session_state[key] in f.following_mandatory_values:
                     st.session_state.following_mandatory = True
             f.title += " :red[* required!]" if (validation_error and not SHOW_VALIDATION_ERROR_MESSAGE) else' :red[*]'
         f.help = None
         match f.type:
             case 'checkbox':
                 st.checkbox(f.title,
@@ -362,6 +417,7 @@ def show_field(f: Field, index: int, data_collected):
                 st.text_area(f.title, key=key, value=value, max_chars=None)
         if validation_error:
             st.error(f"Mandatory field")
@@ -377,24 +433,31 @@ def show_fields(fields: List[Field]):
     submitted = st.form_submit_button("Submit")
     if submitted:
-        if not st.session_state.valid:
             st.error("Please fill in all mandatory fields")
             # st.rerun() # filed-out values are not shown otherwise
         else:
             with st.spinner(text="saving"):
-                save_data({
-                    'user_id': st.session_state.user_id,
-                    'index': st.session_state.current_index,
-                    **(st.session_state.data.iloc[index][COLS_TO_SAVE].to_dict() if index >= 0 else {}),
-                    **{k: st.session_state[k+str(index)] for k in st.session_state.data_inputs_keys}
-                })
             st.success("Feedback submitted successfully!")
             navigate(1)
     st.session_state.form_displayed = st.session_state.current_index
-#st.set_page_config(layout='wide')
 # Title of the app
 st.title("Moderator Intervention Prediction")
@@ -404,19 +467,46 @@ st.markdown(
 div[data-testid="stMarkdownContainer"] > p {
     font-size: 1rem;
 }
-    section.main > div {max-width:75rem}
     </style>
     """, unsafe_allow_html=True)
 # Load the data to annotate
 if 'data' not in st.session_state:
-    st.session_state.data = read_data(to_annotate_file_name)
-# Initialize the current index
 if 'current_index' not in st.session_state:
-    st.session_state.current_index = -3
     st.session_state.form_displayed = -3
 def add_validated_submit(fields, message):
     st.session_state.form_displayed = st.session_state.current_index
@@ -426,25 +516,19 @@ def add_validated_submit(fields, message):
         else:
             navigate(1)
 def add_checked_submit():
     check = st.checkbox('I agree', key='consent')
     add_validated_submit([check], "Please agree to give your consent to proceed")
-def add_annotation_guidelines():
-    st.write(f"username is {st.session_state.user_id}")
-    st.markdown(
-        "<details open><summary>Annotation Guidelines</summary>" + guidelines_text + "</details>"
-        , unsafe_allow_html=True)
 if st.session_state.current_index == -3:
     with st.form("data_form"):
         st.markdown(consent_text)
         add_checked_submit()
 elif st.session_state.current_index == -2:
-    user_id_from_url = get_user_id_from_url()
-    if user_id_from_url:
-        st.session_state.user_id = user_id_from_url
         navigate(1)
     else:
         with st.form("data_form"):

 from typing import List, Optional, Dict
 from PIL import Image
+import numpy as np
 import pandas as pd
 import streamlit as st
+from fsspec.implementations.local import LocalFileSystem
 from huggingface_hub import HfFileSystem
 import streamlit.components.v1 as components
 @dataclass
 class Field:
     type: str
     other_params: Optional[Dict[str, object]] = field(default_factory=lambda: {})
 # Function to get user ID from URL
+def get_param_from_url(param):
+    user_id = st.query_params.get(param, "")
     return user_id
 ########################################################################################
 # CHANGE THE FOLLOWING VARIABLES ACCORDING TO YOUR NEEDS
+# 'local' or 'hf'. hf is for Hugging Face file system but has limits on the number of access per hour
+filesytem = 'hf'
+# path to repo or local file system TODO rename
 input_repo_path = 'datasets/emvecchi/annotation'
+output_repo_path = 'datasets/emvecchi/annotation
+# filesystem = 'local'
+# path to repo or local file system
+# input_repo_path = '/data/mod-gen-eval-pref'
+# output_repo_path = '/data/mod-gen-eval-pref'
 to_annotate_file_name = 'to_annotate.csv'  # CSV file to annotate
 COLS_TO_SAVE = ['comment_id','comment','confidence_score']
 '''
 guidelines_text = 'Please read <a href="https://acrobat.adobe.com/id/urn:aaid:sc:EU:1a1347b0-3423-49ee-aa28-87679f8a69c0">the guidelines</a>'
 study_code = 'CE552C7F'
+# failed_sanity_check_code = 'C102EK63'  # screened-out code
+failed_sanity_check_code = 'C15RGLJA'
+redirect_url = f'https://app.prolific.com/submissions/complete?cfc={study_code}'
+annotation_guidelines_fields: List[Field] = [
+    Field(name="annotation_guidelines", type="radio", title="Did you read the guidelines?", mandatory=True,
+          other_params={'labels': ['Yes, in detail, and I understand the study',
+                                   'Yes, in detail, but still confused',
+                                   'Yes, I skimmed it',
+                                   'I will read it later',
+                                   'No, not interested in reading them',
+                                   'I can not open the link',
+                                   ],
+                        'accepted_values': [0]}),
+]
 intro_fields: List[Field] = [
     Field(type="container", title="**Introductory Questions**", children=[
 SHOW_VALIDATION_ERROR_MESSAGE = True
 ########################################################################################
+if filesystem == 'hf':
+    HF_TOKEN = os.environ.get("HF_TOKEN_WRITE2")
+    print("is none?", HF_TOKEN is None)
+    hf_fs = HfFileSystem(token=HF_TOKEN)
+else:
+    hf_fs = LocalFileSystem()
+def get_start_index():
+    if hf_fs.exists(output_repo_path + '/' + get_base_path()):
+        files = hf_fs.ls(output_repo_path + '/' + get_base_path())
+        return len(files) - 2
+    else:
+        return -3
+def read_data():
+    with hf_fs.open(input_repo_path + '/' + to_annotate_file_name) as f:
+        return pd.read_csv(f)
 def read_saved_data():
     _path = get_path()
             except json.JSONDecodeError as e:
                 print(e)
     return None
 # Write a remote file
 def save_data(data):
+    if not hf_fs.exists(f"{output_repo_path}/{get_base_path()}"):
+        hf_fs.mkdir(f"{output_repo_path}/{get_base_path()}")
     with hf_fs.open(f"{output_repo_path}/{get_path()}", "w") as f:
         f.write(json.dumps(data))
+def get_base_path():
+    return f"{st.session_state.user_id}"
 def get_path():
+    return f"{get_base_path()}/{st.session_state.current_index}.json"
 def display_image(image_path):
         st.session_state.following_mandatory = False
         match f.type:
             case 'input_col':
+                value = st.session_state.data.iloc[index][f.name]
+                if value and value is not np.nan:
                     st.write(f.title)
+                    if f.name == 'image_name':
+                        display_image(os.path.join(input_repo_path, 'images', value))
+                    else:
+                        st.write(value)
             case 'markdown':
                 st.markdown(f.title)
             case 'expander' | 'container':
                         st.markdown(f.title)
                     for child in f.children:
                         show_field(child, index, data_collected)
+            case 'skip_checkbox':
+                st.checkbox(f.title, key=f.name, value=False)
     else:
         key = f.name + str(index)
         st.session_state.data_inputs_keys.append(f.name)
             f.title = f'**{f.title}**\n\n{f.help}' if f.help else f.title
         validation_error = False
+        # form is not displayed for first time
+        if st.session_state.form_displayed == st.session_state.current_index:
+            if f.mandatory or st.session_state.following_mandatory:
                 if st.session_state[key] == INPUT_FIELD_DEFAULT_VALUES[f.type]:
                     st.session_state.valid = False
                     validation_error = True
                 elif f.following_mandatory_values and st.session_state[key] in f.following_mandatory_values:
                     st.session_state.following_mandatory = True
+            # check for any unaccepted values
+            if (
+                    (f.other_params.get('accepted_values') and
+                     value not in f.other_params.get('accepted_values')) or
+                    (f.other_params.get('accepted_values_per_sample') and
+                     index in f.other_params.get('accepted_values_per_sample') and
+                     value not in f.other_params.get('accepted_values_per_sample').get(index))
+            ):
+                st.session_state.unacceptable_response = True
+        if f.mandatory or st.session_state.following_mandatory:
             f.title += " :red[* required!]" if (validation_error and not SHOW_VALIDATION_ERROR_MESSAGE) else' :red[*]'
         f.help = None
         match f.type:
             case 'checkbox':
                 st.checkbox(f.title,
                 st.text_area(f.title, key=key, value=value, max_chars=None)
         if validation_error:
+            st.session_state.unacceptable_response = False
             st.error(f"Mandatory field")
     submitted = st.form_submit_button("Submit")
     if submitted:
+        if 'unacceptable_response' in st.session_state and st.session_state.unacceptable_response:
+            prep_and_save_data(index, ('skip' in st.session_state and st.session_state['skip']))
+            st.rerun()
+        skip_sample = ('skip' in st.session_state and st.session_state['skip'])
+        if not skip_sample and not st.session_state.valid:
             st.error("Please fill in all mandatory fields")
             # st.rerun() # filed-out values are not shown otherwise
         else:
             with st.spinner(text="saving"):
+                prep_and_save_data(index, skip_sample)
             st.success("Feedback submitted successfully!")
             navigate(1)
     st.session_state.form_displayed = st.session_state.current_index
+def prep_and_save_data(index, skip_sample):
+    save_data({
+        'user_id': st.session_state.user_id,
+        'index': st.session_state.current_index,
+        **(st.session_state.data.iloc[index][COLS_TO_SAVE].to_dict() if index >= 0 else {}),
+        **{k: st.session_state[k + str(index)] for k in st.session_state.data_inputs_keys},
+        'skip': skip_sample
+    })
+# st.set_page_config(layout='wide')
 # Title of the app
 st.title("Moderator Intervention Prediction")
 div[data-testid="stMarkdownContainer"] > p {
     font-size: 1rem;
 }
+    section.main > div {max-width:60rem}
     </style>
     """, unsafe_allow_html=True)
+def add_annotation_guidelines():
+    st.write(f"username is {st.session_state.user_id}")
+    st.markdown(
+        "<details open><summary><b>Annotation Guidelines</b></summary>" + guidelines_text + "</details><br>"
+        , unsafe_allow_html=True)
+if 'unacceptable_response' in st.session_state and st.session_state.unacceptable_response:
+    add_annotation_guidelines()
+    st.error("You are not eligible for this study. Thank you for your time!" +
+             ("" if st.session_state.current_index < 0 else
+              #" You will receive a small compensation as explained in the guidelines. "
+              "Please email eva-maria.vecchi@ims.uni-stuttgart.de for issues or questions."
+             ))
+    st.stop()
 # Load the data to annotate
 if 'data' not in st.session_state:
+    st.session_state.data = read_data()
+# user id
+user_id_from_url = get_param_from_url("user_id")
+if user_id_from_url:
+    st.session_state.user_id = user_id_from_url
+# current index
 if 'current_index' not in st.session_state:
+    start_index = get_start_index()
+    st.session_state.current_index = start_index
     st.session_state.form_displayed = -3
+if get_param_from_url('show_extra_fields'):
+    fields += url_conditional_fields
 def add_validated_submit(fields, message):
     st.session_state.form_displayed = st.session_state.current_index
         else:
             navigate(1)
 def add_checked_submit():
     check = st.checkbox('I agree', key='consent')
     add_validated_submit([check], "Please agree to give your consent to proceed")
 if st.session_state.current_index == -3:
     with st.form("data_form"):
         st.markdown(consent_text)
         add_checked_submit()
 elif st.session_state.current_index == -2:
+    if st.session_state.get('user_id'):
         navigate(1)
     else:
         with st.form("data_form"):