asahi417 commited on
Commit
a72cb3f
·
1 Parent(s): 18fb4f3
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -10,7 +10,7 @@ from transformers.pipelines.audio_utils import ffmpeg_read
10
 
11
 
12
  # configuration
13
- MODEL_NAME = "japanese-asr/distil-whisper-bilingual-v1.0"
14
  BATCH_SIZE = 16
15
  CHUNK_LENGTH_S = 15
16
  # device setting
@@ -52,9 +52,7 @@ def format_time(start: Optional[float], end: Optional[float]):
52
 
53
  @spaces.GPU
54
  def get_prediction(inputs, task: str, language: Optional[str]):
55
- generate_kwargs = {"task": task}
56
- if language:
57
- generate_kwargs['language'] = language
58
  prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
59
  text = "".join([c['text'] for c in prediction['chunks']])
60
  text_timestamped = "\n".join([
@@ -64,7 +62,6 @@ def get_prediction(inputs, task: str, language: Optional[str]):
64
 
65
 
66
  def transcribe(inputs: str, task: str, language: str):
67
- language = None if language == "none" else language
68
  if inputs is None:
69
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
70
  with open(inputs, "rb") as f:
@@ -83,9 +80,8 @@ mf_transcribe = gr.Interface(
83
  fn=transcribe,
84
  inputs=[
85
  gr.Audio(sources="microphone", type="filepath"),
86
- gr.Textbox(lines=1, placeholder="Prompt"),
87
- gr.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
88
- gr.Radio(["none", "ja", "en"], label="Language", default="none")
89
  ],
90
  outputs=["text", "text"],
91
  title=title,
@@ -96,9 +92,8 @@ file_transcribe = gr.Interface(
96
  fn=transcribe,
97
  inputs=[
98
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
99
- gr.Textbox(lines=1, placeholder="Prompt"),
100
- gr.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
101
- gr.Radio(["none", "ja", "en"], label="Language", default="none")
102
  ],
103
  outputs=["text", "text"],
104
  title=title,
 
10
 
11
 
12
  # configuration
13
+ MODEL_NAME = "kotoba-tech/kotoba-whisper-bilingual-v1.0"
14
  BATCH_SIZE = 16
15
  CHUNK_LENGTH_S = 15
16
  # device setting
 
52
 
53
  @spaces.GPU
54
  def get_prediction(inputs, task: str, language: Optional[str]):
55
+ generate_kwargs = {"task": task, "language": language}
 
 
56
  prediction = pipe(inputs, return_timestamps=True, generate_kwargs=generate_kwargs)
57
  text = "".join([c['text'] for c in prediction['chunks']])
58
  text_timestamped = "\n".join([
 
62
 
63
 
64
  def transcribe(inputs: str, task: str, language: str):
 
65
  if inputs is None:
66
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
67
  with open(inputs, "rb") as f:
 
80
  fn=transcribe,
81
  inputs=[
82
  gr.Audio(sources="microphone", type="filepath"),
83
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
84
+ gr.Radio(["ja", "en"], label="Output Language", value="ja")
 
85
  ],
86
  outputs=["text", "text"],
87
  title=title,
 
92
  fn=transcribe,
93
  inputs=[
94
  gr.Audio(sources="upload", type="filepath", label="Audio file"),
95
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
96
+ gr.Radio(["ja", "en"], label="Output Language", value="ja")
 
97
  ],
98
  outputs=["text", "text"],
99
  title=title,