Eldermind commited on
Commit
fa2d48a
1 Parent(s): a525656

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import gradio as gr
2
- from pydub import AudioSegment
3
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
4
  import torch
5
 
@@ -10,24 +9,20 @@ model = Wav2Vec2ForCTC.from_pretrained(model_id)
10
  model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
11
 
12
  def transcribe(file_path):
13
- # Load the audio file and process it
14
- audio = AudioSegment.from_file(file_path)
15
- audio_samples = audio.get_array_of_samples()
16
- audio_bytes = bytes(audio_samples)
17
 
18
- # Prepare the audio file for the model
19
- input_values = processor(audio_bytes, return_tensors="pt", sampling_rate=audio.frame_rate).input_values
20
- input_values = input_values.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
21
 
22
- # Perform the prediction
23
- with torch.no_grad():
24
- logits = model(input_values).logits
25
-
26
- # Decode the recognized speech
27
- predicted_ids = torch.argmax(logits, dim=-1)
28
- transcription = processor.batch_decode(predicted_ids)[0]
29
-
30
- return transcription
31
 
32
  # Gradio interface setup
33
  with gr.Blocks() as demo:
@@ -39,4 +34,4 @@ with gr.Blocks() as demo:
39
 
40
  audio_input.change(transcribe, inputs=audio_input, outputs=audio_output)
41
 
42
- demo.launch(share=True)
 
1
  import gradio as gr
 
2
  from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
3
  import torch
4
 
 
9
  model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
10
 
11
  def transcribe(file_path):
12
+ try:
13
+ audio_input, sampling_rate = processor.audio_file_to_array(file_path)
14
+ input_values = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt").input_values
15
+ input_values = input_values.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
16
 
17
+ with torch.no_grad():
18
+ logits = model(input_values).logits
 
19
 
20
+ predicted_ids = torch.argmax(logits, dim=-1)
21
+ transcription = processor.batch_decode(predicted_ids)[0]
22
+ return transcription
23
+ except Exception as e:
24
+ print(f"Error during transcription: {e}")
25
+ return "Transcription error"
 
 
 
26
 
27
  # Gradio interface setup
28
  with gr.Blocks() as demo:
 
34
 
35
  audio_input.change(transcribe, inputs=audio_input, outputs=audio_output)
36
 
37
+ demo.launch()