CineAI commited on
Commit
e18fb9d
·
verified ·
1 Parent(s): 33273c7

Update audio2text/a2t.py

Browse files
Files changed (1) hide show
  1. audio2text/a2t.py +15 -7
audio2text/a2t.py CHANGED
@@ -2,7 +2,7 @@ import numpy as np
2
 
3
  import librosa
4
  import torch
5
- from .init import processor, model
6
 
7
  LIMIT = 90 # limit 90 seconds
8
 
@@ -37,6 +37,14 @@ class A2T:
37
  except Exception as e:
38
  print("Preprocces error", e)
39
  return None
 
 
 
 
 
 
 
 
40
 
41
  def predict(self):
42
  if self.mic is not None:
@@ -46,12 +54,12 @@ class A2T:
46
  return "please provide audio"
47
 
48
  try:
49
- forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
50
- # audio = self.__preprocces(audio=audio, frame_rate=frame_rate)
51
- inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
52
- predicted_ids = model.generate(**inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
53
- transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
54
- return transcription[0]
55
  except Exception as e:
56
  print("Predict error", e)
57
  return "Oops some kinda error"
 
2
 
3
  import librosa
4
  import torch
5
+ from .init import pipe
6
 
7
  LIMIT = 90 # limit 90 seconds
8
 
 
37
  except Exception as e:
38
  print("Preprocces error", e)
39
  return None
40
+
41
+ def __transcribe(self, inputs, task: str = None):
42
+ if inputs is None:
43
+ print("Inputs None")
44
+
45
+ transcribed_text = pipe(inputs, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
46
+ return transcribed_text
47
+
48
 
49
  def predict(self):
50
  if self.mic is not None:
 
54
  return "please provide audio"
55
 
56
  try:
57
+ # forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe")
58
+ # # audio = self.__preprocces(audio=audio, frame_rate=frame_rate)
59
+ # inputs = processor(audio=audio, sampling_rate=16000, return_tensors="pt")
60
+ # predicted_ids = model.generate(**inputs, max_length=400, forced_decoder_ids=forced_decoder_ids)
61
+ # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
62
+ return __transcribe(inputs=audio)
63
  except Exception as e:
64
  print("Predict error", e)
65
  return "Oops some kinda error"