ASR / Gradio /app.py
zahoor54321's picture
Update Gradio/app.py
078d7bc
import os
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch
############### HF ###########################
#HF_TOKEN = os.getenv("HF_TOKEN")
HF_TOKEN = "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"
os.environ["HF_TOKEN"] = HF_TOKEN
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
############## DagsHub ################################
Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
# This is not working because Huggingface has completely changed the git server.
# from dagshub.streaming import install_hooks
# install_hooks()
############## Inference ##############################
def asr(audio):
asr = pipeline("automatic-speech-recognition", model=Model)
prediction = asr(audio, chunk_length_s=30)
return unicodedata.normalize("NFC",prediction["text"])
################### Gradio Web APP ################################
#<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
title = "Automatic Speech Recognition System for Urdu Language"
description = """
<p>
<center>
</center>
</p>
"""
article = "<p style='text-align: center'><a href='https://seventick.com/asr-model-for-urdu-language/' target='_blank'>Visit for more info</a></p>"
examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"]]
Input = gr.Audio(
source="microphone",
type="filepath",
label="Please Record Your Voice",
)
Output = gr.Textbox(label="Urdu Script")
def main():
iface = gr.Interface(
asr,
Input,
Output,
title=title,
allow_flagging="manual",
flagging_callback=hf_writer,
description=description,
article=article,
examples=examples,
theme='sketch'
)
iface.launch(enable_queue=True)
# theme='JohnSmith9982/small_and_pretty'
# enable_queue=True,auth=("admin", "pass1234")
if __name__ == "__main__":
main()