Spaces:
Sleeping
Sleeping
Yurii Paniv
commited on
Commit
•
f754acc
1
Parent(s):
9a0bda1
Improve model handling
Browse files
client.py
CHANGED
@@ -2,105 +2,30 @@
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
from __future__ import absolute_import, division, print_function
|
4 |
|
5 |
-
import argparse
|
6 |
import numpy as np
|
7 |
-
import shlex
|
8 |
-
import subprocess
|
9 |
import sys
|
10 |
import wave
|
11 |
-
import json
|
12 |
|
13 |
from deepspeech import Model, version
|
14 |
from timeit import default_timer as timer
|
15 |
|
16 |
-
try:
|
17 |
-
from shhlex import quote
|
18 |
-
except ImportError:
|
19 |
-
from pipes import quote
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
sox_cmd = 'sox {} --type raw --bits 16 --channels 1 --rate {} --encoding signed-integer --endian little --compression 0.0 --no-dither - '.format(
|
24 |
-
quote(audio_path), desired_sample_rate)
|
25 |
-
try:
|
26 |
-
output = subprocess.check_output(
|
27 |
-
shlex.split(sox_cmd), stderr=subprocess.PIPE)
|
28 |
-
except subprocess.CalledProcessError as e:
|
29 |
-
raise RuntimeError('SoX returned non-zero status: {}'.format(e.stderr))
|
30 |
-
except OSError as e:
|
31 |
-
raise OSError(e.errno, 'SoX not found, use {}hz files or install it: {}'.format(
|
32 |
-
desired_sample_rate, e.strerror))
|
33 |
-
|
34 |
-
return desired_sample_rate, np.frombuffer(output, np.int16)
|
35 |
-
|
36 |
-
|
37 |
-
def metadata_to_string(metadata):
|
38 |
-
return ''.join(token.text for token in metadata.tokens)
|
39 |
-
|
40 |
-
|
41 |
-
def words_from_candidate_transcript(metadata):
|
42 |
-
word = ""
|
43 |
-
word_list = []
|
44 |
-
word_start_time = 0
|
45 |
-
# Loop through each character
|
46 |
-
for i, token in enumerate(metadata.tokens):
|
47 |
-
# Append character to word if it's not a space
|
48 |
-
if token.text != " ":
|
49 |
-
if len(word) == 0:
|
50 |
-
# Log the start time of the new word
|
51 |
-
word_start_time = token.start_time
|
52 |
-
|
53 |
-
word = word + token.text
|
54 |
-
# Word boundary is either a space or the last character in the array
|
55 |
-
if token.text == " " or i == len(metadata.tokens) - 1:
|
56 |
-
word_duration = token.start_time - word_start_time
|
57 |
-
|
58 |
-
if word_duration < 0:
|
59 |
-
word_duration = 0
|
60 |
-
|
61 |
-
each_word = dict()
|
62 |
-
each_word["word"] = word
|
63 |
-
each_word["start_time "] = round(word_start_time, 4)
|
64 |
-
each_word["duration"] = round(word_duration, 4)
|
65 |
-
|
66 |
-
word_list.append(each_word)
|
67 |
-
# Reset
|
68 |
-
word = ""
|
69 |
-
word_start_time = 0
|
70 |
-
|
71 |
-
return word_list
|
72 |
-
|
73 |
-
|
74 |
-
def metadata_json_output(metadata):
|
75 |
-
json_result = dict()
|
76 |
-
json_result["transcripts"] = [{
|
77 |
-
"confidence": transcript.confidence,
|
78 |
-
"words": words_from_candidate_transcript(transcript),
|
79 |
-
} for transcript in metadata.transcripts]
|
80 |
-
return json.dumps(json_result, indent=2)
|
81 |
-
|
82 |
-
|
83 |
-
class VersionAction(argparse.Action):
|
84 |
-
def __init__(self, *args, **kwargs):
|
85 |
-
super(VersionAction, self).__init__(nargs=0, *args, **kwargs)
|
86 |
-
|
87 |
-
def __call__(self, *args, **kwargs):
|
88 |
-
print('DeepSpeech ', version())
|
89 |
-
exit(0)
|
90 |
|
91 |
|
92 |
def client(audio_file, lang="uk"):
|
93 |
model_load_start = timer()
|
94 |
# sphinx-doc: python_ref_model_start
|
95 |
-
|
96 |
if lang not in ["en", "uk"]:
|
97 |
lang = "uk"
|
98 |
if lang == "uk":
|
99 |
-
|
100 |
if lang == "en":
|
101 |
-
|
102 |
-
|
103 |
-
ds =
|
104 |
# sphinx-doc: python_ref_model_stop
|
105 |
model_load_end = timer() - model_load_start
|
106 |
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
from __future__ import absolute_import, division, print_function
|
4 |
|
|
|
5 |
import numpy as np
|
|
|
|
|
6 |
import sys
|
7 |
import wave
|
|
|
8 |
|
9 |
from deepspeech import Model, version
|
10 |
from timeit import default_timer as timer
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
uk_model = Model("./uk.tflite")
|
14 |
+
en_model = Model("./deepspeech-0.7.3-models.tflite")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
def client(audio_file, lang="uk"):
|
18 |
model_load_start = timer()
|
19 |
# sphinx-doc: python_ref_model_start
|
20 |
+
model = uk_model
|
21 |
if lang not in ["en", "uk"]:
|
22 |
lang = "uk"
|
23 |
if lang == "uk":
|
24 |
+
model = uk_model
|
25 |
if lang == "en":
|
26 |
+
model = en_model
|
27 |
+
|
28 |
+
ds = model
|
29 |
# sphinx-doc: python_ref_model_stop
|
30 |
model_load_end = timer() - model_load_start
|
31 |
print('Loaded model in {:.3}s.'.format(model_load_end), file=sys.stderr)
|