Spaces:
Running
Running
File size: 2,516 Bytes
31a2efa 7bcf8d4 31a2efa 3bcfb9d 31a2efa d99ef6c 8f0b2e6 ff54a19 31a2efa d99ef6c 31a2efa ff54a19 31a2efa 7bcf8d4 31a2efa 8b3f196 6cf7481 8b3f196 d310dd9 8b3f196 31a2efa 8a878fb 31a2efa dd10fee 31a2efa 25df624 31a2efa 8b3f196 31a2efa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr
############### HF ###########################
HF_TOKEN = os.getenv("HF_TOKEN")
hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
############## DVC ################################
Model = "Model"
if os.path.isdir(".dvc"):
print("Running DVC")
# os.system("dvc config cache.type copy")
# os.system("dvc config core.no_scm true")
# os.system("git init")
if os.system(f"dvc pull {Model} -r origin") != 0:
exit("dvc pull failed")
# os.system("rm -r .dvc")
# .apt/usr/lib/dvc
############## Inference ##############################
def asr(audio):
asr = pipeline("automatic-speech-recognition", model=Model)
prediction = asr(audio, chunk_length_s=5, stride_length_s=1)
return unicodedata.normalize("NFC",prediction["text"])
################### Gradio Web APP ################################
title = "Urdu Automatic Speech Recognition"
description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
</center>
</p>
<center>
<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
"""
article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with π€ Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"
examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]
Input = gr.inputs.Audio(
source="microphone",
type="filepath",
optional=True,
label="Please Record Your Voice",
)
Output = gr.outputs.Textbox(label="Urdu Script")
def main():
iface = gr.Interface(
asr,
Input,
Output,
title=title,
flagging_options=["incorrect", "worst", "ambiguous"],
allow_flagging="manual",
flagging_callback=hf_writer,
description=description,
article=article,
examples=examples,
theme="grass",
)
iface.launch(enable_queue=True)
# enable_queue=True,auth=("admin", "pass1234")
if __name__ == "__main__":
main()
|