zahoor54321 commited on
Commit
edb20d9
1 Parent(s): a2b4633

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -65
app.py CHANGED
@@ -1,77 +1,24 @@
1
  import os
2
  import unicodedata
3
- from datasets import load_dataset, Audio
4
  from transformers import pipeline
5
  import gradio as gr
6
- import torch
7
-
8
- ############### HF ###########################
9
-
10
- HF_TOKEN = os.getenv("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")
11
-
12
- hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "Urdu-ASR-flags")
13
-
14
- ############## DagsHub ################################
15
-
16
- Model = "kingabzpro/wav2vec2-large-xls-r-300m-Urdu"
17
- # This is not working because Huggingface has completely changed the git server.
18
- # from dagshub.streaming import install_hooks
19
- # install_hooks()
20
-
21
- ############## Inference ##############################
22
 
 
 
 
23
 
 
24
  def asr(audio):
25
-
26
- asr = pipeline("automatic-speech-recognition", model=Model)
27
  prediction = asr(audio, chunk_length_s=30)
28
- return unicodedata.normalize("NFC",prediction["text"])
29
-
30
-
31
- ################### Gradio Web APP ################################
32
 
 
33
  title = "Urdu Automatic Speech Recognition"
 
 
 
34
 
35
- description = """
36
- <p>
37
- <center>
38
- This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the common_voice dataset.
39
- <img src="https://huggingface.co/spaces/kingabzpro/Urdu-ASR-SOTA/resolve/main/Images/cover.jpg" alt="logo" width="550"/>
40
- </center>
41
- </p>
42
- """
43
-
44
- article = "<p style='text-align: center'><a href='https://dagshub.com/kingabzpro/Urdu-ASR-SOTA' target='_blank'>Source Code on DagsHub</a></p><p style='text-align: center'><a href='https://huggingface.co/blog/fine-tune-xlsr-wav2vec2' target='_blank'>Fine-tuning XLS-R for Multi-Lingual ASR with 🤗 Transformers</a></p></center><center><img src='https://visitor-badge.glitch.me/badge?page_id=kingabzpro/Urdu-ASR-SOTA' alt='visitor badge'></center></p>"
45
-
46
- examples = [["Sample/sample1.mp3"], ["Sample/sample2.mp3"], ["Sample/sample3.mp3"]]
47
-
48
-
49
- Input = gr.Audio(
50
- source="microphone",
51
- type="filepath",
52
- label="Please Record Your Voice",
53
- )
54
- Output = gr.Textbox(label="Urdu Script")
55
-
56
-
57
- def main():
58
- iface = gr.Interface(
59
- asr,
60
- Input,
61
- Output,
62
- title=title,
63
- allow_flagging="manual",
64
- flagging_callback=hf_writer,
65
- description=description,
66
- article=article,
67
- examples=examples,
68
- theme='JohnSmith9982/small_and_pretty'
69
- )
70
-
71
- iface.launch(enable_queue=True)
72
-
73
-
74
- # enable_queue=True,auth=("admin", "pass1234")
75
 
76
- if __name__ == "__main__":
77
- main()
 
1
  import os
2
  import unicodedata
 
3
  from transformers import pipeline
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
+ # Set the Hugging Face token
7
+ HF_TOKEN = "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"
8
+ os.environ["HF_TOKEN"] = HF_TOKEN
9
 
10
+ # Define the ASR function
11
  def asr(audio):
12
+ asr = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
 
13
  prediction = asr(audio, chunk_length_s=30)
14
+ return unicodedata.normalize("NFC", prediction[0]["text"])
 
 
 
15
 
16
+ # Define the Gradio interface
17
  title = "Urdu Automatic Speech Recognition"
18
+ description = "This model performs automatic speech recognition for the Urdu language."
19
+ input_audio = gr.inputs.Audio(source="microphone", type="file", label="Record your voice")
20
+ output_text = gr.outputs.Textbox(label="Transcription")
21
 
22
+ gr.Interface(fn=asr, inputs=input_audio, outputs=output_text, title=title, description=description).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Ensure to replace "YOUR_HF_TOKEN" with your actual Hugging Face token.