import os from pathlib import Path import pandas as pd import streamlit as st from datasets import load_dataset from dotenv import load_dotenv if Path(".env").is_file(): load_dotenv(".env") st.set_page_config(layout="wide") HF_TOKEN = os.getenv("HF_TOKEN") ds = load_dataset("HuggingFaceH4/instruction-pilot-outputs-filtered", split="train", use_auth_token=HF_TOKEN) st.markdown("# Instruction Model Outputs") st.markdown( """This app shows the outputs of various open-souce, instruction-trained models from a [dataset](https://huggingface.co/datasets/HuggingFaceH4/instruction-pilot-outputs-filtered) of human demonstrations filtered for overlap with the original prompt and canned responses. Hit the button below to view a few random samples from the generated outputs.""" ) st.markdown( """**Notes** * Some outputs contain a `Human:` prefix - this is likely due to the fact each model was prompted to be a dialogue agent. * The outputs were generated deterministically with `temperature=0` and `max_new_tokens=100` """ ) button = st.button("Show me what you got!") if button is True: sample_ds = ds.shuffle().select(range(5)) for sample in sample_ds: st.markdown(f'**Prompt:** {sample["prompt"]}') df = pd.DataFrame.from_records(sample["filtered_outputs"]) # CSS to inject contained in a string hide_table_row_index = """ """ # Inject CSS with Markdown st.markdown(hide_table_row_index, unsafe_allow_html=True) st.table(df)