Spaces:
Running
on
Zero
Running
on
Zero
fix
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
|
|
10 |
|
11 |
|
12 |
# configuration
|
13 |
-
MODEL_NAME = "
|
14 |
BATCH_SIZE = 16
|
15 |
CHUNK_LENGTH_S = 15
|
16 |
# device setting
|
@@ -52,9 +52,7 @@ def format_time(start: Optional[float], end: Optional[float]):
|
|
52 |
|
53 |
@spaces.GPU
|
54 |
def get_prediction(inputs, task: str, language: Optional[str]):
|
55 |
-
generate_kwargs = {"task": task}
|
56 |
-
if language:
|
57 |
-
generate_kwargs['language'] = language
|
58 |
prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
|
59 |
text = "".join([c['text'] for c in prediction['chunks']])
|
60 |
text_timestamped = "\n".join([
|
@@ -64,7 +62,6 @@ def get_prediction(inputs, task: str, language: Optional[str]):
|
|
64 |
|
65 |
|
66 |
def transcribe(inputs: str, task: str, language: str):
|
67 |
-
language = None if language == "none" else language
|
68 |
if inputs is None:
|
69 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
70 |
with open(inputs, "rb") as f:
|
@@ -83,9 +80,8 @@ mf_transcribe = gr.Interface(
|
|
83 |
fn=transcribe,
|
84 |
inputs=[
|
85 |
gr.Audio(sources="microphone", type="filepath"),
|
86 |
-
gr.
|
87 |
-
gr.Radio(["
|
88 |
-
gr.Radio(["none", "ja", "en"], label="Language", default="none")
|
89 |
],
|
90 |
outputs=["text", "text"],
|
91 |
title=title,
|
@@ -96,9 +92,8 @@ file_transcribe = gr.Interface(
|
|
96 |
fn=transcribe,
|
97 |
inputs=[
|
98 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
99 |
-
gr.
|
100 |
-
gr.Radio(["
|
101 |
-
gr.Radio(["none", "ja", "en"], label="Language", default="none")
|
102 |
],
|
103 |
outputs=["text", "text"],
|
104 |
title=title,
|
|
|
10 |
|
11 |
|
12 |
# configuration
|
13 |
+
MODEL_NAME = "kotoba-tech/kotoba-whisper-bilingual-v1.0"
|
14 |
BATCH_SIZE = 16
|
15 |
CHUNK_LENGTH_S = 15
|
16 |
# device setting
|
|
|
52 |
|
53 |
@spaces.GPU
|
54 |
def get_prediction(inputs, task: str, language: Optional[str]):
|
55 |
+
generate_kwargs = {"task": task, "language": language}
|
|
|
|
|
56 |
prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
|
57 |
text = "".join([c['text'] for c in prediction['chunks']])
|
58 |
text_timestamped = "\n".join([
|
|
|
62 |
|
63 |
|
64 |
def transcribe(inputs: str, task: str, language: str):
|
|
|
65 |
if inputs is None:
|
66 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
67 |
with open(inputs, "rb") as f:
|
|
|
80 |
fn=transcribe,
|
81 |
inputs=[
|
82 |
gr.Audio(sources="microphone", type="filepath"),
|
83 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
84 |
+
gr.Radio(["ja", "en"], label="Output Language", value="ja")
|
|
|
85 |
],
|
86 |
outputs=["text", "text"],
|
87 |
title=title,
|
|
|
92 |
fn=transcribe,
|
93 |
inputs=[
|
94 |
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
95 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
96 |
+
gr.Radio(["ja", "en"], label="Output Language", value="ja")
|
|
|
97 |
],
|
98 |
outputs=["text", "text"],
|
99 |
title=title,
|