Issamohammed commited on
Commit
fd865b6
·
verified ·
1 Parent(s): 1b6640e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -5
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import torch
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  import gradio as gr
 
 
4
 
5
- device = "cpu" # Free CPU only
 
6
  torch_dtype = torch.float32
7
 
 
8
  model_id = "KBLab/kb-whisper-large"
9
 
10
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
@@ -22,14 +26,30 @@ pipe = pipeline(
22
  torch_dtype=torch_dtype,
23
  )
24
 
25
- def transcribe(audio):
26
- result = pipe(audio, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
27
- return result["text"]
 
 
 
 
 
 
 
 
28
 
 
 
 
 
 
 
 
 
29
  gr.Interface(
30
  fn=transcribe,
31
  inputs=gr.Audio(type="filepath", label="Upload Swedish Audio"),
32
  outputs=gr.Textbox(label="Transcribed Text"),
33
  title="KB-Whisper Transcriber (Swedish, Free CPU)",
34
- description="Transcribes Swedish audio using KBLab's Whisper Large model. Running on free CPU — may be slow."
35
  ).launch(share=True)
 
1
  import torch
2
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
  import gradio as gr
4
+ from pydub import AudioSegment
5
+ import os
6
 
7
+ # Set device and precision for CPU
8
+ device = "cpu"
9
  torch_dtype = torch.float32
10
 
11
+ # Load KB-Whisper model (Large variant)
12
  model_id = "KBLab/kb-whisper-large"
13
 
14
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
 
26
  torch_dtype=torch_dtype,
27
  )
28
 
29
+ def transcribe(audio_path):
30
+ # Handle m4a or other formats by converting to wav
31
+ base, ext = os.path.splitext(audio_path)
32
+ if ext.lower() != ".wav":
33
+ try:
34
+ sound = AudioSegment.from_file(audio_path)
35
+ audio_converted_path = base + ".converted.wav"
36
+ sound.export(audio_converted_path, format="wav")
37
+ audio_path = audio_converted_path
38
+ except Exception as e:
39
+ return f"Error converting audio: {str(e)}"
40
 
41
+ # Transcribe
42
+ try:
43
+ result = pipe(audio_path, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
44
+ return result["text"]
45
+ except Exception as e:
46
+ return f"Transcription failed: {str(e)}"
47
+
48
+ # Build Gradio interface
49
  gr.Interface(
50
  fn=transcribe,
51
  inputs=gr.Audio(type="filepath", label="Upload Swedish Audio"),
52
  outputs=gr.Textbox(label="Transcribed Text"),
53
  title="KB-Whisper Transcriber (Swedish, Free CPU)",
54
+ description="Upload .m4a, .mp3, or .wav files. Transcribes Swedish speech using KBLab's Whisper Large model.",
55
  ).launch(share=True)