File size: 2,516 Bytes
31a2efa
7bcf8d4
31a2efa
 
 
 
 
 
 
 
 
 
 
 
3bcfb9d
31a2efa
 
 
d99ef6c
 
8f0b2e6
ff54a19
31a2efa
d99ef6c
31a2efa
 
 
 
 
 
 
ff54a19
31a2efa
7bcf8d4
31a2efa
 
 
 
 
 
8b3f196
 
 
 
 
 
6cf7481
 
 
8b3f196
 
d310dd9
8b3f196
31a2efa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a878fb
 
31a2efa
dd10fee
31a2efa
 
25df624
31a2efa
 
8b3f196
31a2efa
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import unicodedata
from datasets import load_dataset, Audio
from transformers import pipeline
import gradio as gr

############### HF ###########################

HF_TOKEN = os.getenv("HF_TOKEN")

hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")

############## DVC ################################

Model = "Model"

if os.path.isdir(".dvc"):
    print("Running DVC")
    # os.system("dvc config cache.type copy")
    # os.system("dvc config core.no_scm true")
    # os.system("git init")
    if os.system(f"dvc pull {Model} -r origin") != 0:
        exit("dvc pull failed")
    # os.system("rm -r .dvc")
# .apt/usr/lib/dvc

############## Inference ##############################


def asr(audio):

    asr = pipeline("automatic-speech-recognition", model=Model)
    prediction = asr(audio, chunk_length_s=5, stride_length_s=1)
    return unicodedata.normalize("NFC",prediction["text"])


################### Gradio Web APP ################################

title = "Urdu Automatic Speech Recognition"

description = """
<p>
<center>
This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
</center>
</p>
<center>
<img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
</center>
"""

article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with πŸ€— Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"

examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]


Input = gr.inputs.Audio(
    source="microphone",
    type="filepath",
    optional=True,
    label="Please Record Your Voice",
)
Output = gr.outputs.Textbox(label="Urdu Script")


def main():
    iface = gr.Interface(
        asr,
        Input,
        Output,
        title=title,
        flagging_options=["incorrect", "worst", "ambiguous"],
        allow_flagging="manual",
        flagging_callback=hf_writer,
        description=description,
        article=article,
        examples=examples,
        theme="grass",
    )

    iface.launch(enable_queue=True)


# enable_queue=True,auth=("admin", "pass1234")

if __name__ == "__main__":
    main()