File size: 1,096 Bytes
44ccb70
76dce3d
 
44ccb70
76dce3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
import os
from dotenv import load_dotenv

load_dotenv()

HF_API_KEY = os.getenv("HF_API_KEY")

# gr.Interface.load("models/pyannote/speaker-diarization").launch()

# 1. visit hf.co/pyannote/speaker-diarization and accept user conditions
# 2. visit hf.co/pyannote/segmentation and accept user conditions
# 3. visit hf.co/settings/tokens to create an access token
# 4. instantiate pretrained speaker diarization pipeline
from pyannote.audio import Pipeline

pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization@2.1",
    use_auth_token=HF_API_KEY,
)


def transcribe_audio(audio):
    # Perform speech-to-text on audio file
    # apply the pipeline to an audio file
    diarization = pipeline(audio)
    text = ""
    for turn, _, speaker in diarization.itertracks(yield_label=True):
        text = (
            text
            + f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}"
            + "\n"
        )

    return text


iface = gr.Interface(
    fn=transcribe_audio, inputs=gr.Audio(source="upload"), outputs="text"
)

iface.launch()