Spaces:
Sleeping
Sleeping
File size: 1,381 Bytes
b98062a d7f4671 abce496 b98062a d7f4671 e064c00 b98062a 963ddcb abce496 b98062a abce496 b98062a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# import module
import streamlit as st
import datasets
import pandas as pd
access_token=""
dataset=""
split=""
skip=0
def load():
if dataset=="nlewins/onetalk_questions_full_audio":
column_with_audio="audio_transcription"
column_with_english_text="en"
column_with_other_text="transcription"
elif dataset=="nlewins/LSK_full_with_audio":
column_with_audio="audio_transcription"
column_with_english_text="en"
column_with_other_text="transcription"
ds = datasets.load_dataset(dataset, token=access_token if access_token=="" else st.secrets["hf_token"], split=datasets.ReadInstruction("test",from_=skip,to=skip+50))
for example in ds:
df=pd.DataFrame([example[column_with_other_text],example[column_with_english_text]])
st.table(df.values)
st.audio(example[column_with_audio]["array"],sample_rate=example[column_with_audio]["sampling_rate"])
# Title
st.title("One Talk dataset explorer")
access_token = st.text_input("Access token", value="", max_chars=None, key=None, type="password")
dataset = st.text_input("Dataset", value="nlewins/LSK_full_with_audio", max_chars=None, key=None, type="default")
split = st.text_input("Split", value="test", max_chars=None, key=None, type="default")
skip = st.number_input("Skip", value=250)
st.button("Go",on_click=load)
st.divider()
load()
|