File size: 891 Bytes
9f5d540
 
 
93c6594
9f5d540
 
 
5afd83b
9f5d540
 
9f13d0c
153e956
1e401c5
153e956
1e401c5
ddf950c
 
9f13d0c
93c6594
9f13d0c
 
 
ddf950c
11dabbc
93c6594
9f5d540
93c6594
9f5d540
 
9f13d0c
9f5d540
5358b5a
93c6594
98b2436
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import gradio as gr
from transformers import pipeline

# initialize ASR pipeline
asr = pipeline(
    task="automatic-speech-recognition",
    model="vhdm/whisper-large-fa-v1",
    device=-1  # CPU; set device=0 for GPU
)

def transcribe(audio_file):
    """
    audio_file: path to WAV file (Gradio mic or upload)
    """
    if not audio_file:
        return "No audio input detected."

    try:
        # run ASR
        result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
    except Exception as e:
        return f"ASR error: {e}"

    text = result.get("text", "")
    return text

# gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath", label="Record or upload audio"),
    outputs="text",
    title="Persian ASR",
    description=""" Speak in Persian or upload an audio file."""
)

if __name__ == "__main__":
    iface.launch()