Spaces:

kingabzpro
/

Urdu-ASR-SOTA

Runtime error

File size: 2,299 Bytes

31a2efa
7bcf8d4
31a2efa
 
 
c14e7f9
31a2efa
 
 
 
 
 
 
350c5f7
31a2efa
aea8191
 
 
 
31a2efa
 
 
1696f82
31a2efa
 
ff54a19
1696f82
7bcf8d4
31a2efa
 
 
 
 
 
8b3f196
 
 
 
11fc882
6cf7481
 
11fc882
8b3f196
 
d310dd9
8b3f196
31a2efa
 
 
c5a7581
31a2efa
 
 
 
c5a7581
 
31a2efa
 
 
 
8a878fb
 
31a2efa
 
 
25df624
31a2efa
 
400f40d
31a2efa

import os
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
import torch

############### HF ###########################

HF_TOKEN = os.getenv("HF_TOKEN")

hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")

############## DagsHub ################################

Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
# This is not working because Huggingface has completely changed the git server. 
# from dagshub.streaming import install_hooks
# install_hooks()

############## Inference ##############################


def asr(audio):

    asr = pipeline("automatic-speech-recognition", model=Model)
    prediction = asr(audio, chunk_length_s=30)
    return unicodedata.normalize("NFC",prediction["text"])


################### Gradio Web APP ################################

title = "Urdu Automatic Speech Recognition"

description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.

<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
</p>
"""

article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with 🤗 Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"

examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]


Input = gr.Audio(
    source="microphone",
    type="filepath",
    label="Please Record Your Voice",
)
Output = gr.Textbox(label="Urdu Script")


def main():
    iface = gr.Interface(
        asr,
        Input,
        Output,
        title=title,
        allow_flagging="manual",
        flagging_callback=hf_writer,
        description=description,
        article=article,
        examples=examples,
        theme='JohnSmith9982/small_and_pretty'
    )

    iface.launch(enable_queue=True)


# enable_queue=True,auth=("admin", "pass1234")

if __name__ == "__main__":
    main()