Issamohammed commited on
Commit
7705134
·
verified ·
1 Parent(s): 4f1a68a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -48
app.py CHANGED
@@ -1,55 +1,21 @@
1
- import torch
2
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
- import gradio as gr
4
  from pydub import AudioSegment
5
- import os
6
-
7
- # Set device and precision for CPU
8
- device = "cpu"
9
- torch_dtype = torch.float32
10
-
11
- # Load KB-Whisper model (Large variant)
12
- model_id = "KBLab/kb-whisper-large"
13
-
14
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
15
- model_id, torch_dtype=torch_dtype
16
- ).to(device)
17
-
18
- processor = AutoProcessor.from_pretrained(model_id)
19
-
20
- pipe = pipeline(
21
- "automatic-speech-recognition",
22
- model=model,
23
- tokenizer=processor.tokenizer,
24
- feature_extractor=processor.feature_extractor,
25
- device=device,
26
- torch_dtype=torch_dtype,
27
- )
28
 
29
  def transcribe(audio_path):
30
- # Handle m4a or other formats by converting to wav
31
- base, ext = os.path.splitext(audio_path)
32
- if ext.lower() != ".wav":
33
- try:
34
- sound = AudioSegment.from_file(audio_path)
35
- audio_converted_path = base + ".converted.wav"
36
- sound.export(audio_converted_path, format="wav")
37
- audio_path = audio_converted_path
38
- except Exception as e:
39
- return f"Error converting audio: {str(e)}"
40
-
41
- # Transcribe
42
  try:
 
 
 
 
 
 
 
 
 
 
 
43
  result = pipe(audio_path, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
44
  return result["text"]
 
45
  except Exception as e:
46
- return f"Transcription failed: {str(e)}"
47
-
48
- # Build Gradio interface
49
- gr.Interface(
50
- fn=transcribe,
51
- inputs=gr.Audio(type="filepath", label="Upload Swedish Audio"),
52
- outputs=gr.Textbox(label="Transcribed Text"),
53
- title="KB-Whisper Transcriber (Swedish, Free CPU)",
54
- description="Upload .m4a, .mp3, or .wav files. Transcribes Swedish speech using KBLab's Whisper Large model.",
55
- ).launch()
 
 
 
 
1
  from pydub import AudioSegment
2
+ import mimetypes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def transcribe(audio_path):
 
 
 
 
 
 
 
 
 
 
 
 
5
  try:
6
+ # Detect file type using MIME or extension
7
+ mime_type, _ = mimetypes.guess_type(audio_path)
8
+ ext = os.path.splitext(audio_path)[1].lower()
9
+
10
+ if mime_type == "audio/mp4" or ext == ".m4a":
11
+ print("Converting .m4a to .wav...")
12
+ sound = AudioSegment.from_file(audio_path, format="m4a")
13
+ converted_path = audio_path.replace(".m4a", ".converted.wav")
14
+ sound.export(converted_path, format="wav")
15
+ audio_path = converted_path
16
+
17
  result = pipe(audio_path, chunk_length_s=30, generate_kwargs={"task": "transcribe", "language": "sv"})
18
  return result["text"]
19
+
20
  except Exception as e:
21
+ return f"Error during transcription: {str(e)}"