GIanlucaRub commited on
Commit
023302c
·
1 Parent(s): 799a761

Update app.py

Browse files

Added support for youtube video

Files changed (1) hide show
  1. app.py +67 -11
app.py CHANGED
@@ -1,18 +1,74 @@
1
- from transformers import pipeline
2
  import gradio as gr
 
 
 
 
3
 
4
- pipe = pipeline(model="GIanlucaRub/whisper-tiny-it-4") # change to "your-username/the-name-you-picked"
5
 
6
- def transcribe(audio):
 
 
 
 
 
 
 
7
  text = pipe(audio)["text"]
8
  return text
9
 
10
- iface = gr.Interface(
11
- fn=transcribe,
12
- inputs=gr.Audio(source="microphone", type="filepath"),
13
- outputs="text",
14
- title="Whisper Tiny Italian",
15
- description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- iface.launch()
 
 
1
  import gradio as gr
2
+ from pytube import YouTube
3
+ from transformers import pipeline
4
+ import os
5
+
6
 
7
+ pipe = pipeline(model="GIanlucaRub/whisper-tiny-it-6") # change to "your-username/the-name-you-picked"
8
 
9
+ def transcribe_yt(link):
10
+ yt = YouTube(link)
11
+ audio = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
12
+ text = pipe(audio)["text"]
13
+ os.remove(audio)
14
+ return text
15
+
16
+ def transcribe_audio(audio):
17
  text = pipe(audio)["text"]
18
  return text
19
 
20
+ def populate_metadata(link):
21
+ yt = YouTube(link)
22
+ return yt.thumbnail_url, yt.title
23
+
24
+ title="Youtube Whisperer"
25
+ description="Speech to text transcription of Youtube videos using OpenAI's Whisper"
26
+ block = gr.Blocks()
27
+
28
+ with block:
29
+ gr.HTML(
30
+ """
31
+ <div style="text-align: center; max-width: 500px; margin: 0 auto;">
32
+ <div>
33
+ <h1>Youtube Whisperer</h1>
34
+ </div>
35
+ <p style="margin-bottom: 10px; font-size: 94%">
36
+ Speech to text transcription of Youtube videos using OpenAI's Whisper
37
+ </p>
38
+ </div>
39
+ """
40
+ )
41
+ with gr.Group():
42
+ with gr.Box():
43
+ text = gr.Textbox(
44
+ label="Transcription",
45
+ placeholder="Transcription Output",
46
+ lines=5)
47
+
48
+ microphone=gr.Audio(source="microphone", type="filepath")
49
+
50
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
51
+ btn_microphone = gr.Button("Transcribe microphone audio")
52
+
53
+ audio_uploaded=gr.Audio(source="upload", type="filepath")
54
+
55
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
56
+ btn_audio_uploaded = gr.Button("Transcribe audio uploaded")
57
+
58
+ link = gr.Textbox(label="YouTube Link")
59
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
60
+ btn_youtube = gr.Button("Transcribe Youtube video")
61
+
62
+ with gr.Row().style(mobile_collapse=False, equal_height=True):
63
+ title = gr.Label(label="Video Title", placeholder="Title")
64
+ img = gr.Image(label="Thumbnail")
65
+
66
+
67
+
68
+ # Events
69
+ btn_youtube.click(transcribe_yt, inputs=[link], outputs=[text])
70
+ btn_microphone.click(transcribe_audio, inputs=[microphone], outputs=[text])
71
+ btn_audio_uploaded.click(transcribe_audio, inputs=[audio_uploaded], outputs=[text])
72
+ link.change(populate_metadata, inputs=[link], outputs=[img, title])
73
 
74
+ block.launch(debug=True)