File size: 1,515 Bytes
68bbd23
 
 
 
 
26f19ab
68bbd23
 
 
 
3219217
68bbd23
3219217
68bbd23
3219217
68bbd23
 
 
 
 
 
 
3219217
518d64e
a3182ab
3219217
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import whisper
from pytube import YouTube
import gradio as gr
import os

model = whisper.load_model("medium")

def get_audio(url):
  yt = YouTube(url)
  video = yt.streams.filter(only_audio=True).first()
  out_file = video.download(output_path=".")
  base, ext = os.path.splitext(out_file)
  new_file = base + '.mp3'
  os.rename(out_file, new_file)
  return new_file

def get_text(url):
  result = model.transcribe(get_audio(url))
  return result['text']

with gr.Blocks() as demo:
  gr.Markdown("<h1><center>Youtube video transcription with OpenAI's Whisper</center></h1>")
  gr.Markdown("<center>Enter the link of any youtube video to get the transcription of the video in the form of text.</center>")
  gr.Markdown("<center>This uses the whiper 'medium' model. Its very slow so only use it if other spaces in huggingface are not sufficient for your video. It takes around 9X the video length to generate the transcription.</center>")
  gr.Markdown('<center>You can also use something like <a href="https://colab.research.google.com/github/ArthurFDLR/whisper-youtube/blob/main/whisper_youtube.ipynb">Google Colab</a> to run the model much faster.</center>')
  with gr.Row():
    input_text_1 = gr.Textbox(placeholder='Enter the Youtube video URL', label='URL')
    output_text_1 = gr.Textbox(placeholder='Transcription of the video', label='Transcription')
  result_button_1 = gr.Button('Get Transcription')
  
  result_button_1.click(get_text, inputs=input_text_1, outputs=output_text_1)
demo.launch(debug=True)