Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["TRANSFORMERS_NO_TF"] = "1" | |
| from transformers import pipeline | |
| import gradio as gr | |
| from evaluate import load | |
| # Load WER metric | |
| wer_metric = load("wer") | |
| # Preload multiple ASR models for comparison | |
| models = { | |
| "Wav2Vec2": pipeline( | |
| task="automatic-speech-recognition", | |
| model="Devion333/wav2vec2-xls-r-300m-dv" | |
| ), | |
| "Whisper small": pipeline( | |
| task="automatic-speech-recognition", | |
| model="Devion333/whisper-small-dv-syn" | |
| ), | |
| } | |
| def transcribe(audio, chosen_models, reference): | |
| results = {} | |
| for model_name in chosen_models: | |
| asr_pipe = models[model_name] | |
| prediction = asr_pipe(audio)["text"] | |
| if reference.strip(): | |
| # compute WER if reference provided | |
| wer = wer_metric.compute( | |
| predictions=[prediction.lower()], | |
| references=[reference.lower()] | |
| ) | |
| results[model_name] = { | |
| "prediction": prediction, | |
| "WER": round(wer, 3) | |
| } | |
| else: | |
| results[model_name] = { | |
| "prediction": prediction | |
| } | |
| return results | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=[ | |
| gr.Audio(sources=["microphone", "upload"], type="filepath", label="Upload or Record Speech"), | |
| gr.CheckboxGroup(choices=list(models.keys()), value=["Wav2Vec2"], label="Choose Models to Compare"), | |
| gr.Textbox(label="Reference Transcript (optional)") | |
| ], | |
| outputs=gr.JSON(label="Transcriptions & Statistics"), | |
| title="ASR Model Comparison", | |
| description="Upload or record audio, select ASR models, and compare their transcriptions. Optionally, provide a reference transcript to calculate WER." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |