Update audio2text/a2t.py
Browse files- audio2text/a2t.py +9 -11
audio2text/a2t.py
CHANGED
@@ -3,7 +3,7 @@ import numpy as np
|
|
3 |
from .init import pipe
|
4 |
|
5 |
TASK = "transcribe"
|
6 |
-
BATCH_SIZE =
|
7 |
|
8 |
class A2T:
|
9 |
def __init__(self, mic):
|
@@ -13,32 +13,30 @@ class A2T:
|
|
13 |
if inputs is None:
|
14 |
print("Inputs None")
|
15 |
|
16 |
-
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task
|
17 |
print("transcribed_text : ", transcribed_text)
|
18 |
return transcribed_text["text"]
|
19 |
|
20 |
def __preprocces(self, raw: np.ndarray):
|
21 |
-
chunk = raw.astype(np.float32
|
22 |
return chunk
|
23 |
|
24 |
def predict(self):
|
25 |
try:
|
26 |
if self.mic is not None:
|
27 |
chunk = self.mic.get_array_of_samples()
|
28 |
-
chunk = np.array(chunk)
|
29 |
audio = self.__preprocces(chunk)
|
30 |
-
|
31 |
-
print(f"audio : {audio} \n frame_rate : {sampling_rate} shape : {audio.shape}")
|
32 |
else:
|
33 |
-
|
34 |
|
35 |
if isinstance(audio , np.ndarray):
|
36 |
-
inputs = {"sampling_rate":
|
37 |
return self.__transcribe(inputs=inputs, task=TASK)
|
38 |
else:
|
39 |
-
|
40 |
|
41 |
except Exception as e:
|
42 |
-
|
43 |
-
return "Oops some kinda error"
|
44 |
|
|
|
3 |
from .init import pipe
|
4 |
|
5 |
TASK = "transcribe"
|
6 |
+
BATCH_SIZE = 8
|
7 |
|
8 |
class A2T:
|
9 |
def __init__(self, mic):
|
|
|
13 |
if inputs is None:
|
14 |
print("Inputs None")
|
15 |
|
16 |
+
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": "|en|"})
|
17 |
print("transcribed_text : ", transcribed_text)
|
18 |
return transcribed_text["text"]
|
19 |
|
20 |
def __preprocces(self, raw: np.ndarray):
|
21 |
+
chunk = raw.astype(np.float32) / 32768.0
|
22 |
return chunk
|
23 |
|
24 |
def predict(self):
|
25 |
try:
|
26 |
if self.mic is not None:
|
27 |
chunk = self.mic.get_array_of_samples()
|
28 |
+
chunk = np.array(chunk, dtype=np.int16)
|
29 |
audio = self.__preprocces(chunk)
|
30 |
+
print(f"audio : {audio} \n shape : {audio.shape} \n max : {np.max(audio)}")
|
|
|
31 |
else:
|
32 |
+
raise Exception("please provide audio")
|
33 |
|
34 |
if isinstance(audio , np.ndarray):
|
35 |
+
inputs = {"sampling_rate": 16000, "raw": audio}
|
36 |
return self.__transcribe(inputs=inputs, task=TASK)
|
37 |
else:
|
38 |
+
raise Exception("Audio is not np array")
|
39 |
|
40 |
except Exception as e:
|
41 |
+
return f"Oops some kinda error : {e}"
|
|
|
42 |
|