import os import unicodedata from datasets import load_dataset, Audio from transformers import pipeline import gradio as gr import torch ############### HF ########################### #HF_TOKEN = os.getenv("HF_TOKEN") HF_TOKEN = "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" os.environ["HF_TOKEN"] = HF_TOKEN hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags") ############## DagsHub ################################ Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu" # This is not working because Huggingface has completely changed the git server. # from dagshub.streaming import install_hooks # install_hooks() ############## Inference ############################## def asr(audio): asr = pipeline("automatic-speech-recognition", model=Model) prediction = asr(audio, chunk_length_s=30) return unicodedata.normalize("NFC",prediction["text"]) ################### Gradio Web APP ################################ #logo title = "Automatic Speech Recognition System for Urdu Language" description = """

""" article = "

Visit for more info

" examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"]] Input = gr.Audio( source="microphone", type="filepath", label="Please Record Your Voice", ) Output = gr.Textbox(label="Urdu Script") def main(): iface = gr.Interface( asr, Input, Output, title=title, allow_flagging="manual", flagging_callback=hf_writer, description=description, article=article, examples=examples, theme='sketch' ) iface.launch(enable_queue=True) # theme='JohnSmith9982/small_and_pretty' # enable_queue=True,auth=("admin", "pass1234") if __name__ == "__main__": main()