import json
import os
from dataclasses import dataclass, field
from typing import List, Optional, Dict
from PIL import Image
import pandas as pd
import streamlit as st
from huggingface_hub import HfFileSystem
@dataclass
class Field:
type: str
title: str
name: str = None
help: Optional[str] = None
children: Optional[List['Field']] = None
other_params: Optional[Dict[str, object]] = field(default_factory=lambda: {})
# Function to get user ID from URL
def get_user_id_from_url():
user_id = st.query_params.get("user_id", "")
return user_id
HF_TOKEN = os.environ.get("HF_TOKEN_WRITE")
print("is none?", HF_TOKEN is None)
hf_fs = HfFileSystem(token=HF_TOKEN)
input_repo_path = 'datasets/emvecchi/annotate-pilot'
output_repo_path = 'datasets/emvecchi/annotate-pilot'
to_annotate_file_name = 'to_annotate.csv' # CSV file to annotate
COLS_TO_SAVE = ['comment_id']
agreement_labels = ['strongly disagree', 'disagree', 'neither agree no disagree', 'agree', 'strongly agree']
quality_labels = ['very poor', 'poor', 'acceptable', 'good', 'very good']
priority_labels = ['not a priority', 'low priority', 'neutral', 'moderate priority', 'high priority']
default_labels = agreement_labels
function_choices = ['Broadening Discussion',
'Improving Comment Quality',
'Content Correction',
'Keeping Discussion on Topic',
'Organizing Discussion',
'Policing',
'Resolving Site Use Issues',
'Social Functions',
'Other (please specify)']
property_choices = ['appropriateness',
'clarity',
'constructiveness',
'common good',
'effectiveness',
'emotion',
'impact',
'overall quality',
'proposal',
'Q for justification',
'storytelling',
'rationality',
'reasonableness',
'reciprocity',
'reference',
'Other (please specify)']
assistance_choices = ['Expand the breadth of moderator role',
'Reduce my own bias',
#'Assist with recall',
'Avoids missing relevant instances',
'Improve speed of moderation tasks',
'Manage prioritization of comments to consider',
'Visualization of properties narrows down moderator contribution',
'Other (please specify)']
default_choices = function_choices
fields: List[Field] = [
Field(name="topic", type="input_col", title="**Topic:**"),
Field(type="expander", title="**Preceeding Comment:** *(expand)*", children=[
Field(name="parent_comment", type="input_col", title=""),
]),
#Field(name="parent_comment", type="input_col", title="**Preceeding Comment:**"),
Field(name="comment", type="input_col", title="**Comment:**"),
Field(name="image_name", type="input_col", title="**Visualization of high contributing properties:**"),
Field(type="container", title="**Need for Moderation**", children=[
Field(name="to_moderate", type="radio",
title="Do feel this comment/discussion would benefit from moderator intervention?"),
Field(name="actions_clear", type="select_slider",
title="With what level of **priority** would you need to interact with this comment?", other_params={'labels': priority_labels}),
]),
Field(type="container", title="**Moderation Function**", children=[
Field(name="mod_function", type="multiselect",
title="What type of moderation function is needed here? *(Multiple selection possible)*"),
Field(name="mod_function_other", type="text", title="*If Other, please specify:*"),
]),
Field(type="container", title="**Contributing properties**", children=[
Field(name="relevant_properties", type="multiselect",
title="Which property(s) is most impactful in your assessment? *(Multiple selection possible)*",
other_params={'choices': property_choices}),
Field(name="relevant_properties_other", type="text", title="*If Other, please specify:*"),
]),
Field(type="container", title="**Moderator Assistance**", children=[
Field(name="helpful", type="radio",
title="If this comment/discussion was flagged to you, would it be helpful in your task of moderation?"),
Field(name="mod_assistance", type="multiselect",
title="If yes, please motivate the benefit it would contribute to the task. *(Multiple selection possible)*",
other_params={'choices': assistance_choices}),
Field(name="mod_assistance_other", type="text", title="*If Other, please specify:*"),
]),
Field(type="container", title="**Other**", children=[
Field(name="other_comments", type="text", title="Please provide any additional comments or information: *(optional)*"),
]),
]
INPUT_FIELD_DEFAULT_VALUES = {'slider': 0,
'text': None,
'textarea': None,
'checkbox': False,
'radio': None,
'select_slider': 50,
'multiselect': None}
SHOW_HELP_ICON = False
def read_data(_path):
with hf_fs.open(input_repo_path + '/' + _path) as f:
return pd.read_csv(f)
def read_saved_data():
_path = get_path()
if hf_fs.exists(output_repo_path + '/' + _path):
with hf_fs.open(output_repo_path + '/' + _path) as f:
try:
return json.load(f)
except json.JSONDecodeError as e:
print(e)
return None
# Write a remote file
def save_data(data):
hf_fs.mkdir(f"{output_repo_path}/{data['user_id']}")
with hf_fs.open(f"{output_repo_path}/{get_path()}", "w") as f:
f.write(json.dumps(data))
def get_path():
return f"{st.session_state.user_id}/{st.session_state.current_index}.json"
def display_image(image_path):
with hf_fs.open(image_path) as f:
img = Image.open(f)
st.image(img, caption='8 most contributing properties', use_column_width=True)
#################################### Streamlit App ####################################
# Function to navigate rows
def navigate(index_change):
st.session_state.current_index += index_change
print(st.session_state.current_index)
# https://discuss.streamlit.io/t/click-twice-on-button-for-changing-state/45633/2
st.rerun()
def show_field(f: Field, index: int):
if f.type not in INPUT_FIELD_DEFAULT_VALUES.keys():
match f.type:
case 'input_col':
st.write(f.title)
if f.name == 'image_name':
st.write(f.title)
image_name = st.session_state.data.iloc[index][f.name]
if image_name: # Ensure the image name is not empty
image_path = os.path.join(input_repo_path, 'images', image_name)
display_image(image_path)
else:
st.write(st.session_state.data.iloc[index][f.name])
case 'markdown':
st.markdown(f.title)
case 'expander' | 'container':
with (st.expander(f.title) if f.type == 'expander' else st.container(border=True)):
if f.type == 'container':
st.markdown(f.title)
for child in f.children:
show_field(child, index)
else:
key = f.name + str(index)
value = st.session_state.default_values[f.name] = data_collected[f.name] if data_collected else \
INPUT_FIELD_DEFAULT_VALUES[f.type]
if not SHOW_HELP_ICON:
f.title = f'**{f.title}**\n\n{f.help}' if f.help else f.title
f.help = None
match f.type:
case 'checkbox':
st.session_state.data_inputs[f.name] = st.checkbox(f.title,
key=key,
value=value, help=f.help)
case 'radio':
st.session_state.data_inputs[f.name] = st.radio(f.title,
["yes","no","other"],
key=key,
help=f.help)
case 'slider':
st.session_state.data_inputs[f.name] = st.slider(f.title,
min_value=0, max_value=6, step=1,
key=key,
value=value, help=f.help)
case 'select_slider':
labels = default_labels if not f.other_params.get('labels') else f.other_params.get('labels')
st.session_state.data_inputs[f.name] = st.select_slider(f.title,
options=[0, 25, 50, 75, 100],
format_func=lambda x: labels[x // 25],
key=key,
value=value, help=f.help)
case 'multiselect':
choices = default_choices if not f.other_params.get('choices') else f.other_params.get('choices')
st.session_state.data_inputs[f.name] = st.multiselect(f.title,
options = choices,
key=key, max_selections=3,
help=f.help)
case 'text':
st.session_state.data_inputs[f.name] = st.text_input(f.title, key=key, value=value)
case 'textarea':
st.session_state.data_inputs[f.name] = st.text_area(f.title, key=key, value=value)
st.set_page_config(layout='wide')
# Title of the app
st.title("Moderator Intervention Prediction")
st.markdown(
"""
""", unsafe_allow_html=True)
with st.expander(label="Annotation Guidelines", expanded=False):
st.write('some guidelines here')
# Load the data to annotate
if 'data' not in st.session_state:
st.session_state.data = read_data(to_annotate_file_name)
# Initialize the current index
if 'current_index' not in st.session_state:
st.session_state.current_index = -1
if st.session_state.current_index == -1:
user_id_from_url = get_user_id_from_url()
if user_id_from_url:
st.session_state.user_id = user_id_from_url
navigate(1)
else:
st.session_state.user_id = st.text_input('Please enter your user ID to proceed', value=user_id_from_url)
if st.button("Next"):
navigate(1)
elif st.session_state.current_index < len(st.session_state.data):
st.write(f"username is {st.session_state.user_id}")
# Creating the form
with st.form("feedback_form"):
index = st.session_state.current_index
data_collected = read_saved_data()
st.session_state.default_values = {}
st.session_state.data_inputs = {}
for field in fields:
if field.name not in st.session_state.data.columns:
# Field doesn't exist in input dataframe, add it with a default value
st.session_state.data_inputs[field.name] = None
show_field(field, index)
submitted = st.form_submit_button("Submit")
if submitted:
with st.spinner(text="saving"):
save_data({
'user_id': st.session_state.user_id,
'index': st.session_state.current_index,
**st.session_state.data.iloc[index][COLS_TO_SAVE].to_dict(),
**st.session_state.data_inputs
})
st.success("Feedback submitted successfully!")
navigate(1)
else:
st.write("Finished all data points!")
# Navigation buttons
if st.session_state.current_index > 0:
if st.button("Previous"):
with st.spinner(text="in progress"):
navigate(-1)
if 0 <= st.session_state.current_index < len(st.session_state.data):
st.write(f"Page {st.session_state.current_index + 1} out of {len(st.session_state.data)}")
# disable text input enter to submit
# https://discuss.streamlit.io/t/text-input-how-to-disable-press-enter-to-apply/14457/6
import streamlit.components.v1 as components
components.html(
"""
""",
height=0
)
st.markdown(
"""""", unsafe_allow_html=True
)