shideqin nshmyrevgmail commited on
Commit
75be352
0 Parent(s):

Duplicate from alphacep/asr

Browse files

Co-authored-by: Nickolay V. Shmyrev <nshmyrevgmail@users.noreply.huggingface.co>

Files changed (5) hide show
  1. .gitattributes +27 -0
  2. README.md +14 -0
  3. app.py +121 -0
  4. packages.txt +1 -0
  5. requirements.txt +1 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Automatic Speech Recognition
3
+ emoji: 🌍
4
+ colorFrom: magenta
5
+ colorTo: magenta
6
+ sdk: gradio
7
+ sdk_version: 3.0.26
8
+ app_file: app.py
9
+ pinned: true
10
+ license: apache-2.0
11
+ duplicated_from: alphacep/asr
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ import gradio as gr
4
+ import vosk
5
+ import json
6
+ import subprocess
7
+
8
+ logging.basicConfig(
9
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
10
+ datefmt="%m/%d/%Y %H:%M:%S",
11
+ handlers=[logging.StreamHandler(sys.stdout)],
12
+ )
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.DEBUG)
15
+
16
+ LARGE_MODEL_BY_LANGUAGE = {
17
+ "Russian": {"model_id": "vosk-model-ru-0.42"},
18
+ "Chinese": {"model_id": "vosk-model-cn-0.22"},
19
+ "English": {"model_id": "vosk-model-en-us-0.22"},
20
+ "French": {"model_id": "vosk-model-fr-0.22"},
21
+ "German": {"model_id": "vosk-model-de-0.22"},
22
+ "Italian": {"model_id": "vosk-model-it-0.22"},
23
+ "Japanese": {"model_id": "vosk-model-ja-0.22"},
24
+ "Hindi": {"model_id": "vosk-model-hi-0.22"},
25
+ "Persian": {"model_id": "vosk-model-fa-0.5"},
26
+ "Uzbek": {"model_id": "vosk-model-small-uz-0.22"},
27
+ }
28
+
29
+ LANGUAGES = sorted(LARGE_MODEL_BY_LANGUAGE.keys())
30
+ CACHED_MODELS_BY_ID = {}
31
+
32
+ def asr(model, input_file):
33
+
34
+ rec = vosk.KaldiRecognizer(model, 16000.0)
35
+ results = []
36
+
37
+ process = subprocess.Popen(f'ffmpeg -loglevel quiet -i {input_file} -ar 16000 -ac 1 -f s16le -'.split(),
38
+ stdout=subprocess.PIPE)
39
+
40
+ while True:
41
+ data = process.stdout.read(4000)
42
+ if len(data) == 0:
43
+ break
44
+ if rec.AcceptWaveform(data):
45
+ jres = json.loads(rec.Result())
46
+ results.append(jres['text'])
47
+
48
+ jres = json.loads(rec.FinalResult())
49
+ results.append(jres['text'])
50
+
51
+ return " ".join(results)
52
+
53
+
54
+ def run(input_file, language, history):
55
+
56
+ logger.info(f"Running ASR for {language} for {input_file}")
57
+
58
+ history = history or []
59
+
60
+ model = LARGE_MODEL_BY_LANGUAGE.get(language, None)
61
+
62
+ if model is None:
63
+ history.append({
64
+ "error_message": f"Failed to find a model for {language} language :("
65
+ })
66
+ elif input_file is None:
67
+ history.append({
68
+ "error_message": f"Record input audio first"
69
+ })
70
+ else:
71
+ model_instance = CACHED_MODELS_BY_ID.get(model["model_id"], None)
72
+ if model_instance is None:
73
+ model_instance = vosk.Model(model_name=model["model_id"])
74
+ CACHED_MODELS_BY_ID[model["model_id"]] = model_instance
75
+
76
+ transcription = asr(model_instance, input_file)
77
+
78
+ logger.info(f"Transcription for {input_file}: {transcription}")
79
+
80
+ history.append({
81
+ "model_id": model["model_id"],
82
+ "language": language,
83
+ "transcription": transcription,
84
+ "error_message": None
85
+ })
86
+
87
+ html_output = "<div class='result'>"
88
+ for item in history:
89
+ if item["error_message"] is not None:
90
+ html_output += f"<div class='result_item result_item_error'>{item['error_message']}</div>"
91
+ else:
92
+ html_output += "<div class='result_item result_item_success'>"
93
+ html_output += f'{item["transcription"]}<br/>'
94
+ html_output += "</div>"
95
+ html_output += "</div>"
96
+
97
+ return html_output, history
98
+
99
+
100
+ gr.Interface(
101
+ run,
102
+ inputs=[
103
+ gr.inputs.Audio(source="microphone", type="filepath", label="Record something..."),
104
+ gr.inputs.Radio(label="Language", choices=LANGUAGES),
105
+ "state"
106
+ ],
107
+ outputs=[
108
+ gr.outputs.HTML(label="Outputs"),
109
+ "state"
110
+ ],
111
+ title="Automatic Speech Recognition",
112
+ description="",
113
+ css="""
114
+ .result {display:flex;flex-direction:column}
115
+ .result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
116
+ .result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
117
+ .result_item_error {background-color:#ff7070;color:white;align-self:start}
118
+ """,
119
+ allow_flagging="never",
120
+ theme="default"
121
+ ).launch(enable_queue=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ vosk