import gradio as gr
from transformers import pipeline
from rapidfuzz import process, fuzz

# Initialize ASR pipeline
asr = pipeline(
    task="automatic-speech-recognition",
    model="vhdm/whisper-large-fa-v1",
    device=-1  # CPU; set device=0 for GPU
)

# Custom vocabulary with multiple forms
custom_vocab_map = {
    "نرد": ["نرد", "نِرد", "نَرد"],
    "کامپیوتر": ["کامپیوتر", "کامپیوتره"],
    "هوش مصنوعی": ["هوش مصنوعی", "هوش صنعتی"],
    "ماشین": ["ماشین", "ماشینه"]
}

def replace_fuzzy(text, vocab_map, threshold=85):
    """
    Replace words/phrases in text using fuzzy matching with high threshold.
    """
    for target, alternatives in vocab_map.items():
        result = process.extractOne(text, alternatives, scorer=fuzz.partial_ratio)
        if result is None:
            continue
        if hasattr(result, 'score') and hasattr(result, 'value'):
            score = result.score
            match = result.value
        else:
            match, score = result[:2]
        if score >= threshold:
            text = text.replace(match, target)
    return text

def transcribe(audio_file):
    """
    audio_file: path to WAV file (Gradio mic or upload)
    """
    if not audio_file:
        return "No audio input detected."

    try:
        # Run ASR
        result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
    except Exception as e:
        return f"ASR error: {e}"

    text = result.get("text", "")
    final_text = replace_fuzzy(text, custom_vocab_map, threshold=85)
    return final_text

# Gradio interface
iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath", label="Record or upload audio"),
    outputs="text",
    title="Persian ASR with High Accuracy Vocabulary",
    description="""Speak in Persian or upload an audio file; recognized words
are corrected using a custom high-accuracy vocabulary."""
)

if __name__ == "__main__":
    iface.launch()