Mei000 commited on
Commit
e344fd6
1 Parent(s): fe06869

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -53
app.py CHANGED
@@ -1,63 +1,50 @@
 
1
  from transformers import pipeline
2
  import gradio as gr
3
  from pytube import YouTube
4
 
5
  pipe = pipeline(model="Mei000/whisper-small-sv-SE")
6
 
7
- class GradioInference():
8
- def __init__(self):
9
- self.yt = None
10
- self.loaded_model = pipe
11
 
12
- def __call__(self, link):
13
- if self.yt is None:
14
- self.yt = YouTube(link)
15
- path = self.yt.streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
16
- results_text = self.loaded_model(path)["text"]
17
- return results_text
18
 
19
- def transcribe(audio):
20
- text = pipe(audio)["text"]
21
- return text
22
 
23
- def populate_metadata(self, link):
24
- self.yt = YouTube(link)
25
- return self.yt.thumbnail_url, self.yt.title
26
 
27
- gio = GradioInference()
28
- fn=transcribe,
29
- title="Whisper Small Swedishr",
30
- description="Demo for Swedish speech recognition using a fine-tuned Whisper small model.",
31
-
32
- block = gr.Blocks()
33
- with block:
34
- gr.HTML(
35
- """
36
- <div style="text-align: center; max-width: 500px; margin: 0 auto;">
37
- <div>
38
- <h1>Youtube Whisperer</h1>
39
- </div>
40
- <p style="margin-bottom: 10px; font-size: 94%">
41
- Speech to text transcription of Youtube videos using OpenAI's Whisper
42
- </p>
43
- </div>
44
- """
45
- )
46
-
47
-
48
- with gr.Group():
49
- with gr.Box():
50
- with gr.Row().style(equal_height=True):
51
- inputs=gr.Audio(source="microphone", type="filepath")
52
- out = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
53
- with gr.Row().style(equal_height=True):
54
- link = gr.Textbox(label="YouTube Link")
55
- title = gr.Label(label="Video Title")
56
- with gr.Row().style(equal_height=True):
57
- img = gr.Image(label="Thumbnail")
58
- text = gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
59
- with gr.Row().style(equal_height=True):
60
- btn = gr.Button("Transcribe")
61
- btn.click(gio, inputs=[link], outputs=[text])
62
- link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
63
- block.launch()
 
1
+ from jax._src.custom_derivatives import linear_call
2
  from transformers import pipeline
3
  import gradio as gr
4
  from pytube import YouTube
5
 
6
  pipe = pipeline(model="Mei000/whisper-small-sv-SE")
7
 
 
 
 
 
8
 
9
+ def link_transcribe(link):
10
+ path = YouTube(link).streams.filter(only_audio=True)[0].download(filename="tmp.mp4")
11
+ results_text = pipe(path)["text"]
12
+ return results_text
 
 
13
 
14
+ def transcribe(audio):
15
+ text = pipe(audio)["text"]
16
+ return text
17
 
18
+ def populate_metadata(link):
19
+ lin = YouTube(link)
20
+ return lin.thumbnail_url, lin.title
21
 
22
+ with gr.Blocks() as demo:
23
+ gr.Markdown("Whisper-Small Cantonese Recognition")
24
+ with gr.Row():
25
+ with gr.TabItem("Record from Microphone"):
26
+ record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone")
27
+ record_button = gr.Button("Submit")
28
+ record_outputs = [gr.Textbox(label="Recognized result from Microphone"),]
29
+ """with gr.Row():
30
+ with gr.TabItem("Transcribe from youtube URL"):
31
+ url = gr.Text(max_lines=1, label="Transcribe from youtube URL")
32
+ youtube_button = gr.Button("Submit")
33
+ youtube_outputs = [
34
+ gr.Textbox(label="Recognized speech from youtube URL")
35
+ ] """
36
+ with gr.Row().style(equal_height=True):
37
+ link = gr.Textbox(label="YouTube Link")
38
+ title = gr.Label(label="Video Title")
39
+ with gr.Row().style(equal_height=True):
40
+ img = gr.Image(label="Thumbnail")
41
+ youtube_outputs = [
42
+ gr.Textbox(label="Transcription", placeholder="Transcription Output", lines=10)
43
+ ]
44
+ with gr.Row().style(equal_height=True):
45
+ youtube_button = gr.Button("Submit")
46
+
47
+ record_button.click( fn=transcribe, inputs=record_file, outputs=record_outputs,)
48
+ youtube_button.click( fn=link_transcribe, inputs=link, outputs=youtube_outputs,)
49
+ link.change(gio.populate_metadata, inputs=[link], outputs=[img, title])
50
+ demo.launch()