kingabzpro's picture
Update Gradio/app.py
8e54533 verified
import os
import numpy as np
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch
############### HF ###########################
HF_TOKEN = os.getenv("HF_TOKEN")
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
############## Inference ##############################
transcriber = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
demo = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"]),
"text",
)
################### Gradio Web APP ################################
title = "Urdu Automatic Speech Recognition"
description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
</p>
"""
article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with πŸ€— Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"
examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]
def main():
iface = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"]),
"text",
title=title,
allow_flagging="manual",
flagging_callback=hf_writer,
description=description,
article=article,
examples=examples,
theme='JohnSmith9982/small_and_pretty'
)
iface.launch()
# enable_queue=True,auth=("admin", "pass1234")
if __name__ == "__main__":
main()