mikelalda commited on
Commit
84f0996
1 Parent(s): a16fc31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -21
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import gradio as gr
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  import torch
4
- import requests
5
- import os
6
 
7
- model_id = "distil-whisper/distil-large-v2"
8
 
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -17,26 +15,27 @@ model.to(device)
17
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
18
  processor = AutoProcessor.from_pretrained(model_id)
19
 
20
- pipe = pipeline(
21
- "automatic-speech-recognition",
22
- model=model,
23
- tokenizer=processor.tokenizer,
24
- feature_extractor=processor.feature_extractor,
25
- max_new_tokens=128,
26
- torch_dtype=torch_dtype,
27
- device=device,
28
- )
29
 
30
  def transcribe_audio(audio_file):
31
- recorded_filename = audio_file.name
32
- if os.path.exists(recorded_filename):
33
- results = pipe(recorded_filename)
34
- return results["text"]
35
- else:
36
- return "Error: No audio file uploaded."
37
-
38
- inputs = gr.Audio(sources="upload", type="filepath")
 
 
 
 
 
 
 
39
  outputs = gr.Textbox()
40
 
41
- interface = gr.Interface(fn=transcribe_audio, inputs=inputs, outputs=outputs, title="Audio Transcription App")
 
 
42
  interface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  import torch
 
 
4
 
5
+ model_id = "distil-whisper/distil-large-v3"
6
 
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 
15
  torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
16
  processor = AutoProcessor.from_pretrained(model_id)
17
 
18
+
 
 
 
 
 
 
 
 
19
 
20
  def transcribe_audio(audio_file):
21
+ pipe = pipeline(
22
+ "automatic-speech-recognition",
23
+ model=model,
24
+ tokenizer=processor.tokenizer,
25
+ feature_extractor=processor.feature_extractor,
26
+ max_new_tokens=128,
27
+ torch_dtype=torch_dtype,
28
+ device=device,
29
+ )
30
+ results = pipe(audio_file)
31
+ return results["text"]
32
+
33
+ inputs = [
34
+ gr.Audio(sources="upload", type="filepath"),
35
+ ]
36
  outputs = gr.Textbox()
37
 
38
+ interface = gr.Interface(
39
+ fn=transcribe_audio, inputs=inputs, outputs=outputs, title="Audio Transcription App"
40
+ )
41
  interface.launch()