File size: 1,686 Bytes
e542ede
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os
from pathlib import Path

import pandas as pd
import streamlit as st
from datasets import load_dataset
from dotenv import load_dotenv

if Path(".env").is_file():
    load_dotenv(".env")

st.set_page_config(layout="wide")

HF_TOKEN = os.getenv("HF_TOKEN")

ds = load_dataset("HuggingFaceH4/instruction-model-outputs-filtered", split="train", use_auth_token=HF_TOKEN)

st.markdown("# Instruction Model Outputs")
st.markdown(
    """This app shows the outputs of various open-souce, instruction-trained models from a [dataset](https://huggingface.co/datasets/HuggingFaceH4/instruction-model-outputs-filtered) of human demonstrations filtered for overlap with the original prompt and canned responses. Hit the button below to view a few random samples from the generated outputs."""
)
st.markdown(
    """**Notes**
* Some outputs contain a `Human:` prefix - this is likely due to the fact each model was prompted to be a dialogue agent.
* The outputs were generated deterministically with `temperature=0` and `max_new_tokens=100`
"""
)

button = st.button("Show me what you got!")

if button is True:
    sample_ds = ds.shuffle().select(range(5))

    for sample in sample_ds:
        st.markdown(f'**Prompt:** {sample["prompt"]}')

        df = pd.DataFrame.from_records(sample["outputs"])

        # CSS to inject contained in a string
        hide_table_row_index = """
                    <style>
                    thead tr th:first-child {display:none}
                    tbody th {display:none}
                    </style>
                    """

        # Inject CSS with Markdown
        st.markdown(hide_table_row_index, unsafe_allow_html=True)
        st.table(df)