|
import streamlit as st |
|
import datasets |
|
from functools import partial |
|
|
|
|
|
data = datasets.load_dataset("json", data_files="small_test_data.jsonl")["train"].select(range(100)) |
|
|
|
bad_cutoff = st.slider('Bad words cutoff', 0, 1) |
|
stp_cutoff = st.slider('Stop words cutoff', 0, 1) |
|
ppl_cutoff = st.slider('ppl cutoff', 0, 1) |
|
|
|
|
|
def filter_ppl(examples, invert=False): |
|
return [ppl < ppl_cutoff for ppl in examples["ppl"]] |
|
|
|
def filter_bad(examples, invert=False): |
|
return [bad < bad_cutoff for bad in examples["bad_words"]] |
|
|
|
def filter_stp(examples, invert=False): |
|
return [stp > stp_cutoff for stp in examples["stop_words"]] |
|
|
|
|
|
st.table(data) |
|
|