Update audio2text/a2t.py
Browse files- audio2text/a2t.py +4 -4
audio2text/a2t.py
CHANGED
@@ -10,18 +10,18 @@ class A2T:
|
|
10 |
def __init__(self, mic):
|
11 |
self.mic = mic
|
12 |
|
13 |
-
def __transcribe(self, inputs, task: str = None):
|
14 |
if inputs is None:
|
15 |
print("Inputs None")
|
16 |
|
17 |
-
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE,)["text"]
|
18 |
return transcribed_text
|
19 |
|
20 |
def __preprocces(self, raw: np.ndarray, sampling_rate: int):
|
21 |
chunk = raw.astype(np.float32) / 32678.0
|
22 |
|
23 |
-
if sampling_rate != 16000:
|
24 |
-
|
25 |
|
26 |
# chunk = chunk[:16000*LIMIT]
|
27 |
|
|
|
10 |
def __init__(self, mic):
|
11 |
self.mic = mic
|
12 |
|
13 |
+
def __transcribe(self, inputs, task: str = None, lang: str = "english"):
|
14 |
if inputs is None:
|
15 |
print("Inputs None")
|
16 |
|
17 |
+
transcribed_text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task, "language": lang}, return_timestamps=True)["text"]
|
18 |
return transcribed_text
|
19 |
|
20 |
def __preprocces(self, raw: np.ndarray, sampling_rate: int):
|
21 |
chunk = raw.astype(np.float32) / 32678.0
|
22 |
|
23 |
+
# if sampling_rate != 16000:
|
24 |
+
# chunk = librosa.resample(chunk, orig_sr=sampling_rate, target_sr=16000)
|
25 |
|
26 |
# chunk = chunk[:16000*LIMIT]
|
27 |
|