Spaces:

huggingface
/

text-data-filtering

Running

text-data-filtering / app.py

test

f622ed0 almost 3 years ago

No virus

640 Bytes

	import streamlit as st
	import datasets
	from functools import partial


	data = datasets.load_dataset("json", data_files="small_test_data.jsonl")["train"].select(range(100))

	bad_cutoff = st.slider('Bad words cutoff', 0, 1)
	stp_cutoff = st.slider('Stop words cutoff', 0, 1)
	ppl_cutoff = st.slider('ppl cutoff', 0, 1)


	def filter_ppl(examples, invert=False):
	return [ppl < ppl_cutoff for ppl in examples["ppl"]]

	def filter_bad(examples, invert=False):
	return [bad < bad_cutoff for bad in examples["bad_words"]]

	def filter_stp(examples, invert=False):
	return [stp > stp_cutoff for stp in examples["stop_words"]]


	st.table(data)