palli23 commited on
Commit
5be18fb
·
1 Parent(s): 04cee61

diarization

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -1,10 +1,12 @@
1
- # app.py – Mælendagreining VIRKAR á ZeroGPU (2025 fix)
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
6
  from pyannote.audio import Pipeline
 
7
  import tempfile
 
8
 
9
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
10
 
@@ -13,13 +15,17 @@ def transcribe_with_diarization(audio_path):
13
  if not audio_path:
14
  return "Hladdu upp hljóðskrá"
15
 
16
- # Mælendagreining 2025 syntax
17
- diarization = Pipeline.from_pretrained(
18
- "pyannote/speaker-diarization-3.1",
19
- token=os.getenv("HF_TOKEN") # FIX
20
- ).to("cuda")
 
 
 
 
21
 
22
- dia_result = diarization(audio_path)
23
 
24
  # Whisper-small
25
  asr = pipeline(
@@ -29,21 +35,22 @@ def transcribe_with_diarization(audio_path):
29
  token=os.getenv("HF_TOKEN")
30
  )
31
 
32
- full_text = ""
33
- for turn, _, speaker in dia_result.itertracks(yield_label=True):
34
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
35
- dia_result.crop(audio_path, turn).export(tmp.name, format="wav")
36
- segment_path = tmp.name
37
 
38
  text = asr(segment_path)["text"].strip()
39
- full_text += f"[MÆLENDI {speaker}] {text}\n"
40
  os.unlink(segment_path)
41
 
42
- return full_text or "Ekkert heyrt"
43
 
 
44
  with gr.Blocks() as demo:
45
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
46
- gr.Markdown("**Whisper-small + pyannote 3.1 · 2025 fix**")
47
 
48
  audio = gr.Audio(type="filepath")
49
  btn = gr.Button("Transcribe með mælendum", variant="primary")
 
1
+ # app.py – FIXED Pyannote UnpicklingError (PyTorch 2.6+ Compatible)
2
  import os
3
  import gradio as gr
4
  import spaces
5
  from transformers import pipeline
6
  from pyannote.audio import Pipeline
7
+ import torch
8
  import tempfile
9
+ from torch.serialization import safe_globals # ← KEY FIX
10
 
11
  MODEL_NAME = "palli23/whisper-small-sam_spjall"
12
 
 
15
  if not audio_path:
16
  return "Hladdu upp hljóðskrá"
17
 
18
+ # FIX: Allowlist blocked globals for PyTorch 2.6+
19
+ with safe_globals([
20
+ torch.torch_version.TorchVersion,
21
+ 'pyannote.audio.core.task.Specifications' # Add if needed
22
+ ]):
23
+ diarization = Pipeline.from_pretrained(
24
+ "pyannote/speaker-diarization-3.1",
25
+ token=os.getenv("HF_TOKEN")
26
+ ).to("cuda")
27
 
28
+ dia = diarization(audio_path)
29
 
30
  # Whisper-small
31
  asr = pipeline(
 
35
  token=os.getenv("HF_TOKEN")
36
  )
37
 
38
+ result = []
39
+ for turn, _, speaker in dia.itertracks(yield_label=True):
40
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
41
+ dia.crop(audio_path, turn).export(f.name, format="wav")
42
+ segment_path = f.name
43
 
44
  text = asr(segment_path)["text"].strip()
45
+ result.append(f"[MÆLENDI {speaker}] {text}")
46
  os.unlink(segment_path)
47
 
48
+ return "\n".join(result) or "Ekkert heyrt"
49
 
50
+ # Interface
51
  with gr.Blocks() as demo:
52
  gr.Markdown("# Íslenskt ASR + Mælendagreining")
53
+ gr.Markdown("**Whisper-small + pyannote 3.1 · Fixed PyTorch 2.6+**")
54
 
55
  audio = gr.Audio(type="filepath")
56
  btn = gr.Button("Transcribe með mælendum", variant="primary")