teven's picture
test
f622ed0
raw
history blame
No virus
640 Bytes
import streamlit as st
import datasets
from functools import partial
data = datasets.load_dataset("json", data_files="small_test_data.jsonl")["train"].select(range(100))
bad_cutoff = st.slider('Bad words cutoff', 0, 1)
stp_cutoff = st.slider('Stop words cutoff', 0, 1)
ppl_cutoff = st.slider('ppl cutoff', 0, 1)
def filter_ppl(examples, invert=False):
return [ppl < ppl_cutoff for ppl in examples["ppl"]]
def filter_bad(examples, invert=False):
return [bad < bad_cutoff for bad in examples["bad_words"]]
def filter_stp(examples, invert=False):
return [stp > stp_cutoff for stp in examples["stop_words"]]
st.table(data)