import streamlit as st import json import pandas as pd from collections import Counter uploaded_file = st.file_uploader("Choose a jsonl file prediction to upload", type=["jsonl"]) data = [] labels = ['is against Hillary', 'asks about a location', 'is related to math and science', 'asks for a quantity', 'related to computer or internet', 'involves a situation where people need clean water', 'is grammatical', 'is about world news', 'insult women or immigrants', 'expresses need for utility, energy or sanitation', 'describes a situation that involves terrorism', 'is about physics', 'is about sports news', 'asks about an entity', 'is a more objective description of what happened', 'describes a situation where people need shelter', 'is related to a medical situation', 'supports abortion', 'is about math research', 'is related to food security', 'is related to business', 'is against religion', 'believes in god', 'thinks the movie is good', 'is about entertainment', 'supports feminism', 'is related to technology', 'asks for an opinion', 'asks about a person', 'is against environmentalist', 'is related to health', 'contains subjective opinion', 'involves crime', 'is related to politics or government', 'is pro-life', 'is related to computer science', 'is about statistics', 'involves a search/rescue situation', 'is related to infrastructure', 'contains a bad movie review', 'is ungrammatical', 'asks for factual information', 'is about family and relationships', 'asks about an abbreviation', 'is environmentalist', 'contains offensive content', 'involves a need for people to evacuate', 'is related to sports', 'is offensive to women', 'contains a definition', 'describes a regime change', 'contains irony', 'supports hillary', 'is a spam'] if uploaded_file: data = uploaded_file.readlines() def convert(tup): return f"{tup[0]}: {tup[1]:.2f}" i = st.sidebar.number_input("Choose a line number", min_value=0, max_value=len(data)) do_clear = st.sidebar.button("Clear eval stats") if "scores" not in st.session_state or do_clear: st.session_state["scores"] = {} options = { '(A), if s^ has mostly the same meaning as one of the human annotations s^*; e.g., "is related to sports" = "is about sports".': 3, '(B), if s^ is close but different; e.g., "is about sports team" ≈ "is about sports".': 2, '(C), if s^ is highly correlated but has different meaning; for example, "people needs shelter" is correlated with "there is an earthquake".': 1, '(D), if s^ is unrelated to s^*.': 0, } if len(data) == 0: st.write("No data uploaded") else: st.header("Input data") line = json.loads(data[i]) key_in = "input" if "input" in line else "context" key_out = "prediction_processed" if "prediction_processed" in line else "text" if isinstance(line[key_in], str): st.text(line[key_in]) else: for s in line[key_in]: st.text(s) st.header("Gold label") st.text(labels[i] if "label_processed" not in line else line["label_processed"]) st.header("Predicted label") st.text(line[key_out]) st.session_state["scores"][i] = options[st.radio("Do you like model output? Choose a score", options.keys())] st.write(f"Average: {sum(st.session_state['scores'].values()) / len(st.session_state['scores']) if len(st.session_state['scores']) > 0 else 0 :.2f}, Count = {len(st.session_state['scores'])}") counter = Counter(st.session_state["scores"].values()) for opt, i in options.items(): st.write(f"{opt}: {counter[i] if i in counter else 0}") st.write(st.session_state["scores"]) if "top6_decoded" in line: st.header("Tokenwise top 6 probability breakdown") dataframe = pd.DataFrame({ f"{i}-th Prediction": [convert(list(p.items())[i]) for p in line["top6_decoded"] if len(p) >= 6] for i in range(6) }) st.dataframe(dataframe)