import gradio as gr from dataset import TranscriptDataset from downloader import WhisperPP, YoutubeDownloader from interpreter import WhisperInterpreter model_size = "base" mode = "transcribe" write = False download_path = "tmp/" def dataset(url, name, token): ds = TranscriptDataset(name) data = [] #whisper_options = dict( # model_size=model_size, mode=mode, write=write, number_videos=500) #whisperPP = WhisperPP(data,name, **whisper_options) #downloader = YoutubeDownloader(download_path) #downloader.download(url, whisperPP) params = dict(model_size=model_size,write=write, number_videos=500) overwrite = True ds.generate_dataset(url, download_path, overwrite, params) ds.upload(token) return "Dataset created at : " + "https://huggingface.co/datasets/"+ name yt_input = gr.Textbox(label = 'Youtube Link') name_input = gr.Textbox(label = 'Dataset Name',placeholder = "Enter in the format username/repo_name") token_input = gr.Textbox(label = "HF Token", placeholder="Write access token") repo_output = gr.Textbox(label = "Outcome") iface = gr.Interface(fn=dataset, inputs=[yt_input, name_input, token_input], outputs=repo_output, title="Create Transcription Dataset for Youtube using OpenAI Whisper !", description="Create a HuggingFace repository for Youtube Transcripts! You need to specify a write token obtained in https://hf.co/settings/token. This Space is a an experimental demo.", article="

Find your write token at token settings

") iface.launch()