slapula commited on
Commit
e1a80bb
·
verified ·
1 Parent(s): ccebb5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -3,7 +3,7 @@ from transformers import pipeline
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import gradio as gr
5
 
6
- MODEL_NAME = "openai/whisper-small"
7
  BATCH_SIZE = 8
8
 
9
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -37,7 +37,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
37
  return seconds
38
 
39
 
40
- def transcribe(file, task, return_timestamps):
41
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
42
  text = outputs["text"]
43
  if return_timestamps:
@@ -47,7 +47,11 @@ def transcribe(file, task, return_timestamps):
47
  for chunk in timestamps
48
  ]
49
  text = "\n".join(str(feature) for feature in timestamps)
50
- return text
 
 
 
 
51
 
52
 
53
  demo = gr.Blocks()
@@ -56,12 +60,12 @@ mic_transcribe = gr.Interface(
56
  fn=transcribe,
57
  inputs=[
58
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
59
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
60
  gr.inputs.Checkbox(default=False, label="Return timestamps"),
61
  ],
62
- outputs="text",
63
  layout="vertical",
64
- theme="monochrome",
65
  title="Whisper Demo: Transcribe Audio",
66
  description=(
67
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -75,12 +79,12 @@ file_transcribe = gr.Interface(
75
  fn=transcribe,
76
  inputs=[
77
  gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
78
- gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
79
  gr.inputs.Checkbox(default=False, label="Return timestamps"),
80
  ],
81
- outputs="text",
82
  layout="vertical",
83
- theme="monochrome",
84
  title="Whisper Demo: Transcribe Audio",
85
  description=(
86
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
@@ -96,7 +100,6 @@ file_transcribe = gr.Interface(
96
  )
97
 
98
  with demo:
99
- gr.Column(scale=1)
100
  gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
101
 
102
  demo.launch(enable_queue=True)
 
3
  from transformers.pipelines.audio_utils import ffmpeg_read
4
  import gradio as gr
5
 
6
+ MODEL_NAME = "vinai/PhoWhisper-large"
7
  BATCH_SIZE = 8
8
 
9
  device = 0 if torch.cuda.is_available() else "cpu"
 
37
  return seconds
38
 
39
 
40
+ def transcribe(file, string, return_timestamps):
41
  outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=return_timestamps)
42
  text = outputs["text"]
43
  if return_timestamps:
 
47
  for chunk in timestamps
48
  ]
49
  text = "\n".join(str(feature) for feature in timestamps)
50
+ if text == string:
51
+ grade = "good!"
52
+ else:
53
+ grade = "could use some work..."
54
+ return text, grade
55
 
56
 
57
  demo = gr.Blocks()
 
60
  fn=transcribe,
61
  inputs=[
62
  gr.inputs.Audio(source="microphone", type="filepath", optional=True),
63
+ gr.inputs.Textbox(labels="Word/Phrase"),
64
  gr.inputs.Checkbox(default=False, label="Return timestamps"),
65
  ],
66
+ outputs=["text", "grade"],
67
  layout="vertical",
68
+ theme="huggingface",
69
  title="Whisper Demo: Transcribe Audio",
70
  description=(
71
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
79
  fn=transcribe,
80
  inputs=[
81
  gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
82
+ gr.inputs.Textbox(labels="Word/Phrase"),
83
  gr.inputs.Checkbox(default=False, label="Return timestamps"),
84
  ],
85
+ outputs=["text", "grade"],
86
  layout="vertical",
87
+ theme="huggingface",
88
  title="Whisper Demo: Transcribe Audio",
89
  description=(
90
  "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
 
100
  )
101
 
102
  with demo:
 
103
  gr.TabbedInterface([mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"])
104
 
105
  demo.launch(enable_queue=True)