xhluca
Set default recording name
d45fbdd
from datetime import datetime
import json
import os
import time
from pathlib import Path
import streamlit as st
from utils import (
load_json,
load_json_no_cache,
parse_arguments,
format_chat_message,
find_screenshot,
gather_chat_history,
get_screenshot,
load_page,
)
def show_selectbox(demonstration_dir):
# find all the subdirectories in the current directory
dirs = [
d
for d in os.listdir(demonstration_dir)
if os.path.isdir(f"{demonstration_dir}/{d}")
]
if not dirs:
st.title("No recordings found.")
return None
# sort by date
dirs.sort(key=lambda x: os.path.getmtime(f"{demonstration_dir}/{x}"), reverse=True)
# offer the user a dropdown to select which recording to visualize, set a default
recording_name = st.sidebar.selectbox("Recording", dirs, index=0)
return recording_name
def show_overview(data, recording_name, basedir):
st.title('[WebLINX](https://mcgill-nlp.github.io/weblinx) Explorer')
st.header(f"Recording: `{recording_name}`")
screenshot_size = st.session_state.get("screenshot_size_view_mode", "regular")
show_advanced_info = st.session_state.get("show_advanced_information", False)
if screenshot_size == "regular":
col_layout = [1.5, 1.5, 7, 3.5]
elif screenshot_size == "small":
col_layout = [1.5, 1.5, 7, 2]
else: # screenshot_size == 'large'
col_layout = [1.5, 1.5, 11]
# col_i, col_time, col_act, col_actvis = st.columns(col_layout)
# screenshots = load_screenshots(data, basedir)
for i, d in enumerate(data):
if i > 0 and show_advanced_info:
# Use html to add a horizontal line with minimal gap
st.markdown(
"<hr style='margin-top: 0.1rem; margin-bottom: 0.1rem;'/>",
unsafe_allow_html=True,
)
if screenshot_size == "large":
col_time, col_i, col_act = st.columns(col_layout)
col_actvis = col_act
else:
col_time, col_i, col_act, col_actvis = st.columns(col_layout)
secs_from_start = d["timestamp"] - data[0]["timestamp"]
# `secs_from_start` is a float including ms, display in MM:SS.mm format
col_time.markdown(
f"**{datetime.utcfromtimestamp(secs_from_start).strftime('%M:%S')}**"
)
if not st.session_state.get("enable_html_download", True):
col_i.markdown(f"**#{i}**")
elif d["type"] == "browser" and (page_filename := d["state"]["page"]):
page_path = f"{basedir}/pages/{page_filename}"
col_i.download_button(
label="#" + str(i),
data=load_page(page_path),
file_name=recording_name + "-" + page_filename,
mime="multipart/related",
key=f"page{i}",
)
else:
col_i.button(f"#{i}", type='secondary')
if d["type"] == "chat":
col_act.markdown(format_chat_message(d), unsafe_allow_html=True)
continue
# screenshot_filename = d["state"]["screenshot"]
img = get_screenshot(d, basedir)
arguments = parse_arguments(d["action"])
event_type = d["action"]["intent"]
action_str = f"**{event_type}**({arguments})"
if img:
col_actvis.image(img)
col_act.markdown(action_str)
if show_advanced_info:
status = d["state"].get("screenshot_status", "unknown")
text = ""
if status == "good":
text += f'**:green[Used in demo]**\n\n'
text += f'Screenshot: `{d["state"]["screenshot"]}`\\\n'
text += f'Page: `{d["state"]["page"]}`\n'
col_act.markdown(text)
def load_recording(basedir):
# Before loading replay, we need a dropdown that allows us to select replay.json or replay_orig.json
# Find all files in basedir starting with "replay" and ending with ".json"
replay_files = sorted(
[
f
for f in os.listdir(basedir)
if f.startswith("replay") and f.endswith(".json")
]
)
replay_file = st.sidebar.selectbox("Select replay", replay_files, index=0)
st.sidebar.checkbox(
"Advanced Screenshot Info", False, key="show_advanced_information"
)
st.sidebar.checkbox(
"Enable HTML download", False, key="enable_html_download"
)
replay_file = replay_file.replace(".json", "")
if not Path(basedir).joinpath('metadata.json').exists():
st.error(f"Metadata file not found at {basedir}/metadata.json. This is likely an issue with Huggingface Spaces. Try cloning this repo and running locally.")
st.stop()
metadata = load_json_no_cache(basedir, "metadata")
# convert timestamp to readable date string
recording_start_timestamp = metadata["recordingStart"]
recording_start_date = datetime.fromtimestamp(
int(recording_start_timestamp) / 1000
).strftime("%Y-%m-%d %H:%M:%S")
st.sidebar.markdown(f"**started**: {recording_start_date}")
# recording_end_timestamp = k["recordingEnd"]
# calculate duration
# duration = int(recording_end_timestamp) - int(recording_start_timestamp)
# duration = time.strftime("%M:%S", time.gmtime(duration / 1000))
# Read in the JSON data
replay_dict = load_json_no_cache(basedir, replay_file)
form = load_json_no_cache(basedir, "form")
if replay_dict is None:
st.error(f"Replay file not found at {basedir}/{replay_file}. This is likely an issue with Huggingface Spaces. Try cloning this repo and running locally.")
st.stop()
if form is None:
st.error(f"Form file not found at {basedir}/form.json. This is likely an issue with Huggingface Spaces. Try cloning this repo and running locally.")
st.stop()
duration = replay_dict["data"][-1]["timestamp"] - replay_dict["data"][0]["timestamp"]
duration = time.strftime("%M:%S", time.gmtime(duration))
st.sidebar.markdown(f"**duration**: {duration}")
if not replay_dict:
return None
for key in [
"annotator",
"description",
"tasks",
"upload_date",
"instructor_sees_screen",
"uses_ai_generated_output",
]:
if form and key in form:
# Normalize the key to be more human-readable
key_name = key.replace("_", " ").title()
if type(form[key]) == list:
st.sidebar.markdown(f"**{key_name}**: {', '.join(form[key])}")
else:
st.sidebar.markdown(f"**{key_name}**: {form[key]}")
st.sidebar.markdown("---")
if replay_dict and "status" in replay_dict:
st.sidebar.markdown(f"**Validation status**: {replay_dict['status']}")
processed_meta_path = Path(basedir).joinpath('processed_metadata.json')
start_frame = 'file not found'
if processed_meta_path.exists():
with open(processed_meta_path) as f:
processed_meta = json.load(f)
start_frame = processed_meta.get('start_frame', 'info not in file')
st.sidebar.markdown(f"**Recording start frame**: {start_frame}")
# st.sidebar.button("Delete recording", type="primary", on_click=delete_recording, args=[basedir])
data = replay_dict["data"]
return data
def run():
# mode = st.sidebar.radio("Mode", ["Overview"])
demonstration_dir = "./demonstrations"
# # params = st.experimental_get_query_params()
# params = st.query_params
# print(params)
# # list demonstrations/
# demo_names = os.listdir(demonstration_dir)
# if params.get("recording"):
# if isinstance(params["recording"], list):
# recording_name = params["recording"][0]
# else:
# recording_name = params["recording"]
# else:
# recording_name = demo_names[0]
# recording_name = st.sidebar.selectbox(
# "Recordings",
# demo_names,
# index=demo_names.index(recording_name),
# )
# if recording_name != params.get("recording", [None])[0]:
# # st.experimental_set_query_params(recording=recording_name)
# # use st.query_params as a dict instead
# st.query_params['recording'] = recording_name
demo_names = os.listdir(demonstration_dir)
def update_recording_name():
st.query_params["recording"] = st.session_state.get("recording_name", demo_names[0])
# For initial run, set the query parameter to the selected recording
if not st.query_params.get("recording"):
update_recording_name()
recording_name = st.query_params.get("recording")
if recording_name not in demo_names:
st.error(f"Recording `{recording_name}` not found. Please select another recording.")
st.stop()
recording_idx = demo_names.index(recording_name)
st.sidebar.selectbox(
"Recordings", demo_names, on_change=update_recording_name, key="recording_name", index=recording_idx
)
with st.sidebar:
# Want a dropdown
st.selectbox(
"Screenshot size",
["small", "regular", "large"],
index=1,
key="screenshot_size_view_mode",
)
if recording_name is not None:
basedir = f"{demonstration_dir}/{recording_name}"
data = load_recording(basedir=basedir)
if not data:
st.stop()
show_overview(data, recording_name=recording_name, basedir=basedir)
if __name__ == "__main__":
st.set_page_config(layout="wide")
run()