Update audio2text/a2t.py
Browse files- audio2text/a2t.py +15 -7
audio2text/a2t.py
CHANGED
@@ -2,7 +2,7 @@ import numpy as np
|
|
2 |
|
3 |
import librosa
|
4 |
import torch
|
5 |
-
from .init import
|
6 |
|
7 |
LIMIT = 90 # limit 90 seconds
|
8 |
|
@@ -37,6 +37,14 @@ class A2T:
|
|
37 |
except Exception as e:
|
38 |
print("Preprocces error", e)
|
39 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
def predict(self):
|
42 |
if self.mic is not None:
|
@@ -46,12 +54,12 @@ class A2T:
|
|
46 |
return "please provide audio"
|
47 |
|
48 |
try:
|
49 |
-
forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
|
50 |
-
# audio = self.__preprocces(audio=audio, frame_rate=frame_rate)
|
51 |
-
inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
|
52 |
-
predicted_ids = model.generate(**inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
|
53 |
-
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
54 |
-
return
|
55 |
except Exception as e:
|
56 |
print("Predict error", e)
|
57 |
return "Oops some kinda error"
|
|
|
2 |
|
3 |
import librosa
|
4 |
import torch
|
5 |
+
from .init import pipe
|
6 |
|
7 |
LIMIT = 90 # limit 90 seconds
|
8 |
|
|
|
37 |
except Exception as e:
|
38 |
print("Preprocces error", e)
|
39 |
return None
|
40 |
+
|
41 |
+
def __transcribe(self, inputs, task: str = None):
|
42 |
+
if inputs is None:
|
43 |
+
print("Inputs None")
|
44 |
+
|
45 |
+
transcribed_text = pipe(inputs, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
|
46 |
+
return transcribed_text
|
47 |
+
|
48 |
|
49 |
def predict(self):
|
50 |
if self.mic is not None:
|
|
|
54 |
return "please provide audio"
|
55 |
|
56 |
try:
|
57 |
+
# forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
|
58 |
+
# # audio = self.__preprocces(audio=audio, frame_rate=frame_rate)
|
59 |
+
# inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
|
60 |
+
# predicted_ids = model.generate(**inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
|
61 |
+
# transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
|
62 |
+
return __transcribe(inputs=audio)
|
63 |
except Exception as e:
|
64 |
print("Predict error", e)
|
65 |
return "Oops some kinda error"
|