|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
asr = pipeline( |
|
|
task="automatic-speech-recognition", |
|
|
model="vhdm/whisper-large-fa-v1", |
|
|
device=-1 |
|
|
) |
|
|
|
|
|
def transcribe(audio_file): |
|
|
""" |
|
|
audio_file: path to WAV file (Gradio mic or upload) |
|
|
""" |
|
|
if not audio_file: |
|
|
return "No audio input detected." |
|
|
|
|
|
try: |
|
|
|
|
|
result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5]) |
|
|
except Exception as e: |
|
|
return f"ASR error: {e}" |
|
|
|
|
|
text = result.get("text", "") |
|
|
return text |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=transcribe, |
|
|
inputs=gr.Audio(type="filepath", label="Record or upload audio"), |
|
|
outputs="text", |
|
|
title="Persian ASR", |
|
|
description=""" Speak in Persian or upload an audio file.""" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch() |
|
|
|