anzorq commited on
Commit
eaed2c2
1 Parent(s): 0c872e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -7
app.py CHANGED
@@ -3,6 +3,7 @@ import gradio as gr
3
  import torch
4
  import torchaudio
5
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
 
6
 
7
  model = AutoModelForCTC.from_pretrained("anzorq/w2v-bert-2.0-kbd")
8
  processor = Wav2Vec2BertProcessor.from_pretrained("anzorq/w2v-bert-2.0-kbd")
@@ -40,11 +41,43 @@ def transcribe_speech(audio):
40
 
41
  return pred_text
42
 
43
- interface = gr.Interface(
44
- fn=transcribe_speech,
45
- inputs=gr.Audio(sources="microphone", type="filepath"),
46
- outputs="text",
47
- live=True,
48
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- interface.launch()
 
3
  import torch
4
  import torchaudio
5
  from transformers import AutoModelForCTC, Wav2Vec2BertProcessor
6
+ import yt_dlp
7
 
8
  model = AutoModelForCTC.from_pretrained("anzorq/w2v-bert-2.0-kbd")
9
  processor = Wav2Vec2BertProcessor.from_pretrained("anzorq/w2v-bert-2.0-kbd")
 
41
 
42
  return pred_text
43
 
44
+ @spaces.GPU
45
+ def transcribe_from_youtube(url):
46
+ # Download audio from YouTube using yt-dlp
47
+ audio_path = "downloaded_audio.wav"
48
+ ydl_opts = {
49
+ 'format': 'bestaudio/best',
50
+ 'outtmpl': audio_path,
51
+ 'postprocessors': [{
52
+ 'key': 'FFmpegExtractAudio',
53
+ 'preferredcodec': 'wav',
54
+ 'preferredquality': '192',
55
+ }],
56
+ 'postprocessor_args': ['-ar', '16000'], # Ensure audio is at 16000 Hz
57
+ 'prefer_ffmpeg': True,
58
+ }
59
+
60
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
61
+ ydl.download([url])
62
+
63
+ # Transcribe the downloaded audio
64
+ return transcribe_speech(audio_path)
65
+
66
+ with gr.Blocks() as demo:
67
+ with gr.Tab("Microphone Input"):
68
+ gr.Markdown("## Transcribe speech from microphone")
69
+ mic_audio = gr.Audio(source="microphone", type="filepath", label="Speak into your microphone")
70
+ transcribe_button = gr.Button("Transcribe")
71
+ transcription_output = gr.Textbox(label="Transcription")
72
+
73
+ transcribe_button.click(fn=transcribe_speech, inputs=mic_audio, outputs=transcription_output)
74
+
75
+ with gr.Tab("YouTube URL"):
76
+ gr.Markdown("## Transcribe speech from YouTube video")
77
+ youtube_url = gr.Textbox(label="Enter YouTube video URL")
78
+ transcribe_button = gr.Button("Transcribe")
79
+ transcription_output = gr.Textbox(label="Transcription")
80
+
81
+ transcribe_button.click(fn=transcribe_from_youtube, inputs=youtube_url, outputs=transcription_output)
82
 
83
+ demo.launch()