Spaces:

kotoba-speech
/

kotoba-whisper-demo

Running on Zero

App Files Files Community

alan commited on Jul 24

Commit

6cd713c

•

1 Parent(s): 4dcbad1

update gradio

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +10 -7

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🔥
 colorFrom: yellow
 colorTo: blue
 sdk: gradio
-sdk_version:  3.38.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: yellow
 colorTo: blue
 sdk: gradio
+sdk_version:  4.39.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import tempfile
 from math import floor
 from typing import Optional, List, Dict, Any
 import torch
 import gradio as gr
 import yt_dlp as youtube_dl
@@ -26,6 +27,7 @@ else:
     torch_dtype = torch.float32
     device = "cpu"
     model_kwargs = {}
 # define the pipeline
 pipe = pipeline(
     model=MODEL_NAME,
@@ -35,7 +37,7 @@ pipe = pipeline(
     device=device,
     model_kwargs=model_kwargs,
     trust_remote_code=True
-)
 def format_time(start: Optional[float], end: Optional[float]):
@@ -53,6 +55,7 @@ def format_time(start: Optional[float], end: Optional[float]):
     return f"[{_format_time(start)}-> {_format_time(end)}]:"
 def get_prediction(inputs, prompt: Optional[str]):
     generate_kwargs = {"language": "japanese", "task": "transcribe"}
     if prompt:
@@ -123,8 +126,8 @@ demo = gr.Blocks()
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
-        gr.inputs.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["text", "text"],
     layout="horizontal",
@@ -137,8 +140,8 @@ mf_transcribe = gr.Interface(
 file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
-        gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
-        gr.inputs.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["text", "text"],
     layout="horizontal",
@@ -150,8 +153,8 @@ file_transcribe = gr.Interface(
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
-        gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-        gr.inputs.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["html", "text", "text"],
     layout="horizontal",

 from math import floor
 from typing import Optional, List, Dict, Any
+import spaces
 import torch
 import gradio as gr
 import yt_dlp as youtube_dl
     torch_dtype = torch.float32
     device = "cpu"
     model_kwargs = {}
+print(device)
 # define the pipeline
 pipe = pipeline(
     model=MODEL_NAME,
     device=device,
     model_kwargs=model_kwargs,
     trust_remote_code=True
+).to(device)
 def format_time(start: Optional[float], end: Optional[float]):
     return f"[{_format_time(start)}-> {_format_time(end)}]:"
+@spaces.GPU
 def get_prediction(inputs, prompt: Optional[str]):
     generate_kwargs = {"language": "japanese", "task": "transcribe"}
     if prompt:
 mf_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(sources="microphone", type="filepath", optional=True),
+        gr.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["text", "text"],
     layout="horizontal",
 file_transcribe = gr.Interface(
     fn=transcribe,
     inputs=[
+        gr.Audio(sources="upload", type="filepath", optional=True, label="Audio file"),
+        gr.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["text", "text"],
     layout="horizontal",
 yt_transcribe = gr.Interface(
     fn=yt_transcribe,
     inputs=[
+        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.Textbox(lines=1, placeholder="Prompt", optional=True),
     ],
     outputs=["html", "text", "text"],
     layout="horizontal",